diff --git a/arm/Makefile b/arm/Makefile index e1d37985d..16edebe65 100644 --- a/arm/Makefile +++ b/arm/Makefile @@ -385,7 +385,10 @@ UNOPT_OBJ = p256/unopt/bignum_montmul_p256_base.o \ fastmul/unopt/bignum_mul_8_16_base.o \ fastmul/unopt/bignum_sqr_8_16_base.o -OBJ = $(POINT_OBJ) $(BIGNUM_OBJ) +AES_XTS_OBJ = aes-xts/aes-xts-armv8.o + +# OBJ = $(POINT_OBJ) $(BIGNUM_OBJ) $(AES_XTS_OBJ) +OBJ = $(AES_XTS_OBJ) # Tutorial assembly files @@ -398,9 +401,9 @@ TUTORIAL_OBJ = $(TUTORIAL_PROOFS:.ml=.o) tutorial/rel_loop2.o tutorial/rel_simp2 # x18 should not be used for Apple platforms. Check this using grep. %.o : %.S - cat $< | $(PREPROCESS) | $(SPLIT) | grep -v -E '^\s+.quad\s+0x[0-9a-f]+$$' | $(ASSEMBLE) -o $@ - + cat $< | $(PREPROCESS) | $(SPLIT) | grep -v -E '^\s+.quad\s+0x[0-9a-f]+$$' | $(ASSEMBLE) -march=armv8-a+crypto -o $@ - $(OBJDUMP) $@ | ( ( ! grep --ignore-case -E 'w18|[^0]x18' ) || ( rm $@ ; exit 1 ) ) - cat $< | $(PREPROCESS) | $(SPLIT) | $(ASSEMBLE) -o $@ - + cat $< | $(PREPROCESS) | $(SPLIT) | $(ASSEMBLE) -march=armv8-a+crypto -o $@ - libs2nbignum.a: $(OBJ) ; ar -rc libs2nbignum.a $(OBJ) diff --git a/arm/aes-xts/aes-xts-armv8.S b/arm/aes-xts/aes-xts-armv8.S new file mode 100644 index 000000000..25701e691 --- /dev/null +++ b/arm/aes-xts/aes-xts-armv8.S @@ -0,0 +1,2031 @@ +#include "_internal_s2n_bignum.h" + +# The following xts encrypt is from MacBook M3 build folder +# after moving around some instructions +_aes_hw_xts_encrypt: + # AARCH64_VALID_CALL_TARGET + cmp x2,#16 + // Original input data size bigger than 16, jump to big size processing. + b.ne Lxts_enc_big_size + // Encrypt the iv with key2, as the first XEX iv. + ldr w6,[x4,#240] + ld1 {v0.16b},[x4],#16 + ld1 {v6.16b},[x5] + sub w6,w6,#2 + ld1 {v1.16b},[x4],#16 + +Loop_enc_iv_enc: + aese v6.16b,v0.16b + aesmc v6.16b,v6.16b + ld1 {v0.4s},[x4],#16 + subs w6,w6,#2 + aese v6.16b,v1.16b + aesmc v6.16b,v6.16b + ld1 {v1.4s},[x4],#16 + b.gt Loop_enc_iv_enc + + aese v6.16b,v0.16b + aesmc v6.16b,v6.16b + ld1 {v0.4s},[x4] + aese v6.16b,v1.16b + eor v6.16b,v6.16b,v0.16b + + ld1 {v0.16b},[x0] + eor v0.16b,v6.16b,v0.16b + + ldr w6,[x3,#240] + ld1 {v28.4s,v29.4s},[x3],#32 // load key schedule... + + aese v0.16b,v28.16b + aesmc v0.16b,v0.16b + ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule... + aese v0.16b,v29.16b + aesmc v0.16b,v0.16b + subs w6,w6,#10 //// if rounds==10, jump to aes-128-xts processing +// b.eq .Lxts_128_enc +Lxts_enc_round_loop: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + ld1 {v16.4s},[x3],#16 // load key schedule... + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x3],#16 // load key schedule... + subs w6,w6,#2 // bias + b.gt Lxts_enc_round_loop +//.Lxts_128_enc: + ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule... + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule... + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule... + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + ld1 {v7.4s},[x3] + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v0.16b,v23.16b + eor v0.16b,v0.16b,v7.16b + eor v0.16b,v0.16b,v6.16b + st1 {v0.16b},[x1] + b Lxts_enc_final_abort + +.align 4 +Lxts_enc_big_size: + // Encrypt input size > 16 bytes + stp x19,x20,[sp,#-64]! + stp x21,x22,[sp,#48] + stp d8,d9,[sp,#32] + stp d10,d11,[sp,#16] + + // tailcnt store the tail value of length%16. + and x21,x2,#0xf + and x2,x2,#-16 // len &= 0x1..110000, now divisible by 16 + subs x2,x2,#16 + mov x8,#16 + b.lo Lxts_abort // if !(len > 16): error + csel x8,xzr,x8,eq // if (len == 16): step = 0 + + // Firstly, encrypt the iv with key2, as the first iv of XEX. + ldr w6,[x4,#240] + ld1 {v0.4s},[x4],#16 + ld1 {v6.16b},[x5] + sub w6,w6,#2 + ld1 {v1.4s},[x4],#16 + +Loop_iv_enc: + aese v6.16b,v0.16b + aesmc v6.16b,v6.16b + ld1 {v0.4s},[x4],#16 + subs w6,w6,#2 + aese v6.16b,v1.16b + aesmc v6.16b,v6.16b + ld1 {v1.4s},[x4],#16 + b.gt Loop_iv_enc + + aese v6.16b,v0.16b + aesmc v6.16b,v6.16b + ld1 {v0.4s},[x4] + aese v6.16b,v1.16b + eor v6.16b,v6.16b,v0.16b + + // The iv for second block + // x9- iv(low), x10 - iv(high) + // the five ivs stored into, v6.16b,v8.16b,v9.16b,v10.16b,v11.16b + fmov x9,d6 + fmov x10,v6.d[1] + mov w19,#0x87 + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d8,x9 + fmov v8.d[1],x10 + + ldr w5,[x3,#240] // next starting point + ld1 {v0.16b},[x0],x8 + + ld1 {v16.4s,v17.4s},[x3] // load key schedule... + sub w5,w5,#6 + add x7,x3,x5,lsl#4 // pointer to last 7 round keys + ld1 {v18.4s,v19.4s},[x7],#32 + ld1 {v20.4s,v21.4s},[x7],#32 + ld1 {v22.4s,v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + + sub w5,w5,#2 + add x7,x3,#32 + mov w6,w5 + + // Encryption +Lxts_enc: + ld1 {v24.16b},[x0],#16 + subs x2,x2,#32 // bias + add w6,w5,#2 + orr v3.16b,v0.16b,v0.16b + orr v1.16b,v0.16b,v0.16b + orr v28.16b,v0.16b,v0.16b + orr v27.16b,v24.16b,v24.16b + orr v29.16b,v24.16b,v24.16b + b.lo Lxts_inner_enc_tail // when input size % 5 = 1 or 2 + // (with tail or not) + eor v0.16b,v0.16b,v6.16b // before encryption, xor with iv + eor v24.16b,v24.16b,v8.16b + + // The iv for third block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d9,x9 + fmov v9.d[1],x10 + + + orr v1.16b,v24.16b,v24.16b + ld1 {v24.16b},[x0],#16 + orr v2.16b,v0.16b,v0.16b + orr v3.16b,v1.16b,v1.16b + eor v27.16b,v24.16b,v9.16b // the third block + eor v24.16b,v24.16b,v9.16b + cmp x2,#32 + b.lo Lxts_outer_enc_tail + + // The iv for fourth block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d10,x9 + fmov v10.d[1],x10 + + ld1 {v25.16b},[x0],#16 + // The iv for fifth block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d11,x9 + fmov v11.d[1],x10 + + ld1 {v26.16b},[x0],#16 + eor v25.16b,v25.16b,v10.16b // the fourth block + eor v26.16b,v26.16b,v11.16b + sub x2,x2,#32 // bias + mov w6,w5 + // b .Loop5x_xts_enc + +.align 4 +Loop5x_xts_enc: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + aese v25.16b,v16.16b + aesmc v25.16b,v25.16b + aese v26.16b,v16.16b + aesmc v26.16b,v26.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + aese v25.16b,v17.16b + aesmc v25.16b,v25.16b + aese v26.16b,v17.16b + aesmc v26.16b,v26.16b + ld1 {v17.4s},[x7],#16 + b.gt Loop5x_xts_enc + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + aese v25.16b,v16.16b + aesmc v25.16b,v25.16b + aese v26.16b,v16.16b + aesmc v26.16b,v26.16b + subs x2,x2,#0x50 // because Lxts_enc_tail4x + + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + aese v25.16b,v17.16b + aesmc v25.16b,v25.16b + aese v26.16b,v17.16b + aesmc v26.16b,v26.16b + csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo + mov x7,x3 + + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + aese v1.16b,v18.16b + aesmc v1.16b,v1.16b + aese v24.16b,v18.16b + aesmc v24.16b,v24.16b + aese v25.16b,v18.16b + aesmc v25.16b,v25.16b + aese v26.16b,v18.16b + aesmc v26.16b,v26.16b + add x0,x0,x6 // x0 is adjusted in such way that + // at exit from the loop v1.16b-v26.16b + // are loaded with last "words" + add x6,x2,#0x60 // because Lxts_enc_tail4x + + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + aese v1.16b,v19.16b + aesmc v1.16b,v1.16b + aese v24.16b,v19.16b + aesmc v24.16b,v24.16b + aese v25.16b,v19.16b + aesmc v25.16b,v25.16b + aese v26.16b,v19.16b + aesmc v26.16b,v26.16b + + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v1.16b,v20.16b + aesmc v1.16b,v1.16b + aese v24.16b,v20.16b + aesmc v24.16b,v24.16b + aese v25.16b,v20.16b + aesmc v25.16b,v25.16b + aese v26.16b,v20.16b + aesmc v26.16b,v26.16b + + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v1.16b,v21.16b + aesmc v1.16b,v1.16b + aese v24.16b,v21.16b + aesmc v24.16b,v24.16b + aese v25.16b,v21.16b + aesmc v25.16b,v25.16b + aese v26.16b,v21.16b + aesmc v26.16b,v26.16b + + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v1.16b,v22.16b + aesmc v1.16b,v1.16b + aese v24.16b,v22.16b + aesmc v24.16b,v24.16b + aese v25.16b,v22.16b + aesmc v25.16b,v25.16b + aese v26.16b,v22.16b + aesmc v26.16b,v26.16b + + aese v0.16b,v23.16b + aese v1.16b,v23.16b + aese v24.16b,v23.16b + aese v25.16b,v23.16b + aese v26.16b,v23.16b + + eor v4.16b,v7.16b,v6.16b + // aese v0.16b,v23.16b + // The iv for first block of one iteration + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d6,x9 + fmov v6.d[1],x10 + eor v5.16b,v7.16b,v8.16b + ld1 {v2.16b},[x0],#16 + // aese v1.16b,v23.16b + // The iv for second block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d8,x9 + fmov v8.d[1],x10 + eor v17.16b,v7.16b,v9.16b + ld1 {v3.16b},[x0],#16 + // aese v24.16b,v23.16b + // The iv for third block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d9,x9 + fmov v9.d[1],x10 + eor v30.16b,v7.16b,v10.16b + ld1 {v27.16b},[x0],#16 + // aese v25.16b,v23.16b + // The iv for fourth block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d10,x9 + fmov v10.d[1],x10 + eor v31.16b,v7.16b,v11.16b + ld1 {v28.16b},[x0],#16 + // aese v26.16b,v23.16b + + // The iv for fifth block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d11,x9 + fmov v11.d[1],x10 + + ld1 {v29.16b},[x0],#16 + cbz x6,Lxts_enc_tail4x +// vld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + eor v4.16b,v4.16b,v0.16b + eor v0.16b,v2.16b,v6.16b + eor v5.16b,v5.16b,v1.16b + eor v1.16b,v3.16b,v8.16b + eor v17.16b,v17.16b,v24.16b + eor v24.16b,v27.16b,v9.16b + eor v30.16b,v30.16b,v25.16b + eor v25.16b,v28.16b,v10.16b + eor v31.16b,v31.16b,v26.16b + //vst1 {v4.16b},[x1],#16 + eor v26.16b,v29.16b,v11.16b + //vst1 {v5.16b},[x1],#16 + mov w6,w5 + st1 {v4.16b,v5.16b},[x1],#32 + st1 {v17.16b},[x1],#16 +// vld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] +// vst1 {v30.16b},[x1],#16 +// vst1 {v31.16b},[x1],#16 + st1 {v30.16b,v31.16b},[x1],#32 + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + b.hs Loop5x_xts_enc + + + // If left 4 blocks, borrow the five block's processing. + // This means if (x2 + 1 block) == 0, which is the case + // when input size % 5 = 4, continue processing and do + // another iteration in Loop5x_xts_enc which will exit from + // cbz x6,.Lxts_enc_tail4x. + // Otherwise, this is the end of the loop continue processing + // 0, 1, 2 or 3 blocks (with or without tail) starting at + // Loop5x_enc_after + cmn x2,#0x10 + b.ne Loop5x_enc_after + orr v11.16b,v10.16b,v10.16b + orr v10.16b,v9.16b,v9.16b + orr v9.16b,v8.16b,v8.16b + orr v8.16b,v6.16b,v6.16b + fmov x9,d11 + fmov x10,v11.d[1] + eor v0.16b,v6.16b,v2.16b + eor v1.16b,v8.16b,v3.16b + eor v24.16b,v27.16b,v9.16b + eor v25.16b,v28.16b,v10.16b + eor v26.16b,v29.16b,v11.16b + b.eq Loop5x_xts_enc + +Loop5x_enc_after: + add x2,x2,#0x50 + cbz x2,Lxts_enc_done // no blocks left + + add w6,w5,#2 + subs x2,x2,#0x30 + b.lo Lxts_inner_enc_tail // 1 or 2 blocks left + // (with tail or not) + + eor v0.16b,v6.16b,v27.16b // 3 blocks left + eor v1.16b,v8.16b,v28.16b + eor v24.16b,v29.16b,v9.16b + b Lxts_outer_enc_tail + +.align 4 +Lxts_enc_tail4x: + add x0,x0,#16 + eor v5.16b,v1.16b,v5.16b + st1 {v5.16b},[x1],#16 + eor v17.16b,v24.16b,v17.16b + st1 {v17.16b},[x1],#16 + eor v30.16b,v25.16b,v30.16b + eor v31.16b,v26.16b,v31.16b + st1 {v30.16b,v31.16b},[x1],#32 + b Lxts_enc_done +.align 4 +Lxts_outer_enc_tail: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + ld1 {v17.4s},[x7],#16 + b.gt Lxts_outer_enc_tail + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + eor v4.16b,v6.16b,v7.16b + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v1.16b,v20.16b + aesmc v1.16b,v1.16b + aese v24.16b,v20.16b + aesmc v24.16b,v24.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v1.16b,v21.16b + aesmc v1.16b,v1.16b + aese v24.16b,v21.16b + aesmc v24.16b,v24.16b + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v1.16b,v22.16b + aesmc v1.16b,v1.16b + aese v24.16b,v22.16b + aesmc v24.16b,v24.16b + aese v0.16b,v23.16b + aese v1.16b,v23.16b + aese v24.16b,v23.16b + + eor v17.16b,v9.16b,v7.16b + subs x2,x2,#0x30 + // The iv for first block + fmov x9,d9 + fmov x10,v9.d[1] + //mov w19,#0x87 + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d6,x9 + fmov v6.d[1],x10 + eor v5.16b,v8.16b,v7.16b + csel x6,x2,x6,lo // x6, w6, is zero at this point +// aese v0.16b,v17.16b +// aesmc v0.16b,v0.16b +// aese v1.16b,v17.16b +// aesmc v1.16b,v1.16b +// aese v24.16b,v17.16b +// aesmc v24.16b,v24.16b +// veor v17.16b,v9.16b,v7.16b + + add x6,x6,#0x20 + add x0,x0,x6 + mov x7,x3 + +// aese v0.16b,v20.16b +// aesmc v0.16b,v0.16b +// aese v1.16b,v20.16b +// aesmc v1.16b,v1.16b +// aese v24.16b,v20.16b +// aesmc v24.16b,v24.16b +// aese v0.16b,v21.16b +// aesmc v0.16b,v0.16b +// aese v1.16b,v21.16b +// aesmc v1.16b,v1.16b +// aese v24.16b,v21.16b +// aesmc v24.16b,v24.16b +// aese v0.16b,v22.16b +// aesmc v0.16b,v0.16b +// aese v1.16b,v22.16b +// aesmc v1.16b,v1.16b +// aese v24.16b,v22.16b +// aesmc v24.16b,v24.16b +// aese v0.16b,v23.16b +// aese v1.16b,v23.16b +// aese v24.16b,v23.16b + ld1 {v27.16b},[x0],#16 + add w6,w5,#2 +// vld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + eor v4.16b,v4.16b,v0.16b + eor v5.16b,v5.16b,v1.16b + eor v24.16b,v24.16b,v17.16b +// vld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + ld1 {v16.4s,v17.4s},[x7],#32 +// vst1 {v4.16b},[x1],#16 +// vst1 {v5.16b},[x1],#16 + st1 {v4.16b,v5.16b},[x1],#32 + st1 {v24.16b},[x1],#16 + cmn x2,#0x30 + b.eq Lxts_enc_done +Lxts_encxor_one: + orr v28.16b,v3.16b,v3.16b + orr v29.16b,v27.16b,v27.16b + nop + +Lxts_inner_enc_tail: + cmn x2,#0x10 + eor v1.16b,v28.16b,v6.16b + eor v24.16b,v29.16b,v8.16b + b.eq Lxts_enc_tail_loop + eor v24.16b,v29.16b,v6.16b +Lxts_enc_tail_loop: + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + ld1 {v17.4s},[x7],#16 + b.gt Lxts_enc_tail_loop + + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + aese v1.16b,v20.16b + aesmc v1.16b,v1.16b + aese v24.16b,v20.16b + aesmc v24.16b,v24.16b + cmn x2,#0x20 + aese v1.16b,v21.16b + aesmc v1.16b,v1.16b + aese v24.16b,v21.16b + aesmc v24.16b,v24.16b + eor v5.16b,v6.16b,v7.16b + aese v1.16b,v22.16b + aesmc v1.16b,v1.16b + aese v24.16b,v22.16b + aesmc v24.16b,v24.16b + eor v17.16b,v8.16b,v7.16b + aese v1.16b,v23.16b + aese v24.16b,v23.16b + b.eq Lxts_enc_one + eor v5.16b,v5.16b,v1.16b + st1 {v5.16b},[x1],#16 + eor v17.16b,v17.16b,v24.16b + orr v6.16b,v8.16b,v8.16b + st1 {v17.16b},[x1],#16 + fmov x9,d8 + fmov x10,v8.d[1] + mov w19,#0x87 + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d6,x9 + fmov v6.d[1],x10 + b Lxts_enc_done + +Lxts_enc_one: + eor v5.16b,v5.16b,v24.16b + orr v6.16b,v6.16b,v6.16b + st1 {v5.16b},[x1],#16 + fmov x9,d6 + fmov x10,v6.d[1] + mov w19,#0x87 + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d6,x9 + fmov v6.d[1],x10 + b Lxts_enc_done +.align 5 +Lxts_enc_done: + // Process the tail block with cipher stealing. + tst x21,#0xf + b.eq Lxts_abort + + mov x20,x0 + mov x13,x1 + sub x1,x1,#16 +.composite_enc_loop: + subs x21,x21,#1 + ldrb w15,[x1,x21] + ldrb w14,[x20,x21] + strb w15,[x13,x21] + strb w14,[x1,x21] + b.gt .composite_enc_loop +Lxts_enc_load_done: + ld1 {v26.16b},[x1] + eor v26.16b,v26.16b,v6.16b + + // Encrypt the composite block to get the last second encrypted text block + ldr w6,[x3,#240] // load key schedule... + ld1 {v0.16b},[x3],#16 + sub w6,w6,#2 + ld1 {v1.16b},[x3],#16 // load key schedule... +Loop_final_enc: + aese v26.16b,v0.16b + aesmc v26.16b,v26.16b + ld1 {v0.4s},[x3],#16 + subs w6,w6,#2 + aese v26.16b,v1.16b + aesmc v26.16b,v26.16b + ld1 {v1.4s},[x3],#16 + b.gt Loop_final_enc + + aese v26.16b,v0.16b + aesmc v26.16b,v26.16b + ld1 {v0.4s},[x3] + aese v26.16b,v1.16b + eor v26.16b,v26.16b,v0.16b + eor v26.16b,v26.16b,v6.16b + st1 {v26.16b},[x1] + +Lxts_abort: + ldp x21,x22,[sp,#48] + ldp d8,d9,[sp,#32] + ldp d10,d11,[sp,#16] + ldp x19,x20,[sp],#64 +Lxts_enc_final_abort: + ret + +## aes_hw_xts_encrypt: +## // AARCH64_VALID_CALL_TARGET +## cmp x2,#16 +## // Original input data size bigger than 16, jump to big size processing. +## b.ne .Lxts_enc_big_size +## // Encrypt the iv with key2, as the first XEX iv. +## ldr w6,[x4,#240] +## ld1 {v0.16b},[x4],#16 +## ld1 {v6.16b},[x5] +## sub w6,w6,#2 +## ld1 {v1.16b},[x4],#16 +## +## .Loop_enc_iv_enc: +## aese v6.16b,v0.16b +## aesmc v6.16b,v6.16b +## ld1 {v0.4s},[x4],#16 +## subs w6,w6,#2 +## aese v6.16b,v1.16b +## aesmc v6.16b,v6.16b +## ld1 {v1.4s},[x4],#16 +## b.gt .Loop_enc_iv_enc +## +## aese v6.16b,v0.16b +## aesmc v6.16b,v6.16b +## ld1 {v0.4s},[x4] +## aese v6.16b,v1.16b +## eor v6.16b,v6.16b,v0.16b +## +## ld1 {v0.16b},[x0] +## eor v0.16b,v6.16b,v0.16b +## +## ldr w6,[x3,#240] +## ld1 {v28.4s,v29.4s},[x3],#32 // load key schedule... +## +## aese v0.16b,v28.16b +## aesmc v0.16b,v0.16b +## ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule... +## aese v0.16b,v29.16b +## aesmc v0.16b,v0.16b +## subs w6,w6,#10 // if rounds==10, jump to aes-128-xts processing +## b.eq .Lxts_128_enc +## .Lxts_enc_round_loop: +## aese v0.16b,v16.16b +## aesmc v0.16b,v0.16b +## ld1 {v16.4s},[x3],#16 // load key schedule... +## aese v0.16b,v17.16b +## aesmc v0.16b,v0.16b +## ld1 {v17.4s},[x3],#16 // load key schedule... +## subs w6,w6,#2 // bias +## b.gt .Lxts_enc_round_loop +## .Lxts_128_enc: +## ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule... +## aese v0.16b,v16.16b +## aesmc v0.16b,v0.16b +## aese v0.16b,v17.16b +## aesmc v0.16b,v0.16b +## ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule... +## aese v0.16b,v18.16b +## aesmc v0.16b,v0.16b +## aese v0.16b,v19.16b +## aesmc v0.16b,v0.16b +## ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule... +## aese v0.16b,v20.16b +## aesmc v0.16b,v0.16b +## aese v0.16b,v21.16b +## aesmc v0.16b,v0.16b +## ld1 {v7.4s},[x3] +## aese v0.16b,v22.16b +## aesmc v0.16b,v0.16b +## aese v0.16b,v23.16b +## eor v0.16b,v0.16b,v7.16b +## eor v0.16b,v0.16b,v6.16b +## st1 {v0.16b},[x1] +## b .Lxts_enc_final_abort +## +## .align 4 +## .Lxts_enc_big_size: +## // Encrypt input size > 16 bytes +## stp x19,x20,[sp,#-64]! +## stp x21,x22,[sp,#48] +## stp d8,d9,[sp,#32] +## stp d10,d11,[sp,#16] +## +## // tailcnt store the tail value of length%16. +## and x21,x2,#0xf +## and x2,x2,#-16 // len &= 0x1..110000, now divisible by 16 +## subs x2,x2,#16 +## mov x8,#16 +## b.lo .Lxts_abort // if !(len > 16): error +## csel x8,xzr,x8,eq // if (len == 16): step = 0 +## +## // Firstly, encrypt the iv with key2, as the first iv of XEX. +## ldr w6,[x4,#240] +## ld1 {v0.4s},[x4],#16 +## ld1 {v6.16b},[x5] +## sub w6,w6,#2 +## ld1 {v1.4s},[x4],#16 +## +## .Loop_iv_enc: +## aese v6.16b,v0.16b +## aesmc v6.16b,v6.16b +## ld1 {v0.4s},[x4],#16 +## subs w6,w6,#2 +## aese v6.16b,v1.16b +## aesmc v6.16b,v6.16b +## ld1 {v1.4s},[x4],#16 +## b.gt .Loop_iv_enc +## +## aese v6.16b,v0.16b +## aesmc v6.16b,v6.16b +## ld1 {v0.4s},[x4] +## aese v6.16b,v1.16b +## eor v6.16b,v6.16b,v0.16b +## +## // The iv for second block +## // x9- iv(low), x10 - iv(high) +## // the five ivs stored into, v6.16b,v8.16b,v9.16b,v10.16b,v11.16b +## fmov x9,d6 +## fmov x10,v6.d[1] +## mov w19,#0x87 +## extr x22,x10,x10,#32 +## extr x10,x10,x9,#63 +## and w11,w19,w22,asr#31 +## eor x9,x11,x9,lsl#1 +## fmov d8,x9 +## fmov v8.d[1],x10 +## +## ldr w5,[x3,#240] // next starting point +## ld1 {v0.16b},[x0],x8 +## +## ld1 {v16.4s,v17.4s},[x3] // load key schedule... +## sub w5,w5,#6 +## add x7,x3,x5,lsl#4 // pointer to last 7 round keys +## sub w5,w5,#2 +## ld1 {v18.4s,v19.4s},[x7],#32 +## ld1 {v20.4s,v21.4s},[x7],#32 +## ld1 {v22.4s,v23.4s},[x7],#32 +## ld1 {v7.4s},[x7] +## +## add x7,x3,#32 +## mov w6,w5 +## +## // Encryption +## .Lxts_enc: +## ld1 {v24.16b},[x0],#16 +## subs x2,x2,#32 // bias +## add w6,w5,#2 +## orr v3.16b,v0.16b,v0.16b +## orr v1.16b,v0.16b,v0.16b +## orr v28.16b,v0.16b,v0.16b +## orr v27.16b,v24.16b,v24.16b +## orr v29.16b,v24.16b,v24.16b +## b.lo .Lxts_inner_enc_tail // when input size % 5 = 1 or 2 +## // (with tail or not) +## eor v0.16b,v0.16b,v6.16b // before encryption, xor with iv +## eor v24.16b,v24.16b,v8.16b +## +## // The iv for third block +## extr x22,x10,x10,#32 +## extr x10,x10,x9,#63 +## and w11,w19,w22,asr#31 +## eor x9,x11,x9,lsl#1 +## fmov d9,x9 +## fmov v9.d[1],x10 +## +## +## orr v1.16b,v24.16b,v24.16b +## ld1 {v24.16b},[x0],#16 +## orr v2.16b,v0.16b,v0.16b +## orr v3.16b,v1.16b,v1.16b +## eor v27.16b,v24.16b,v9.16b // the third block +## eor v24.16b,v24.16b,v9.16b +## cmp x2,#32 +## b.lo .Lxts_outer_enc_tail +## +## // The iv for fourth block +## extr x22,x10,x10,#32 +## extr x10,x10,x9,#63 +## and w11,w19,w22,asr#31 +## eor x9,x11,x9,lsl#1 +## fmov d10,x9 +## fmov v10.d[1],x10 +## +## ld1 {v25.16b},[x0],#16 +## // The iv for fifth block +## extr x22,x10,x10,#32 +## extr x10,x10,x9,#63 +## and w11,w19,w22,asr#31 +## eor x9,x11,x9,lsl#1 +## fmov d11,x9 +## fmov v11.d[1],x10 +## +## ld1 {v26.16b},[x0],#16 +## eor v25.16b,v25.16b,v10.16b // the fourth block +## eor v26.16b,v26.16b,v11.16b +## sub x2,x2,#32 // bias +## mov w6,w5 +## b .Loop5x_xts_enc +## +## .align 4 +## .Loop5x_xts_enc: +## aese v0.16b,v16.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v16.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v16.16b +## aesmc v24.16b,v24.16b +## aese v25.16b,v16.16b +## aesmc v25.16b,v25.16b +## aese v26.16b,v16.16b +## aesmc v26.16b,v26.16b +## ld1 {v16.4s},[x7],#16 +## subs w6,w6,#2 +## aese v0.16b,v17.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v17.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v17.16b +## aesmc v24.16b,v24.16b +## aese v25.16b,v17.16b +## aesmc v25.16b,v25.16b +## aese v26.16b,v17.16b +## aesmc v26.16b,v26.16b +## ld1 {v17.4s},[x7],#16 +## b.gt .Loop5x_xts_enc +## +## aese v0.16b,v16.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v16.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v16.16b +## aesmc v24.16b,v24.16b +## aese v25.16b,v16.16b +## aesmc v25.16b,v25.16b +## aese v26.16b,v16.16b +## aesmc v26.16b,v26.16b +## subs x2,x2,#0x50 // because .Lxts_enc_tail4x +## +## aese v0.16b,v17.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v17.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v17.16b +## aesmc v24.16b,v24.16b +## aese v25.16b,v17.16b +## aesmc v25.16b,v25.16b +## aese v26.16b,v17.16b +## aesmc v26.16b,v26.16b +## csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo +## mov x7,x3 +## +## aese v0.16b,v18.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v18.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v18.16b +## aesmc v24.16b,v24.16b +## aese v25.16b,v18.16b +## aesmc v25.16b,v25.16b +## aese v26.16b,v18.16b +## aesmc v26.16b,v26.16b +## add x0,x0,x6 // x0 is adjusted in such way that +## // at exit from the loop v1.16b-v26.16b +## // are loaded with last "words" +## add x6,x2,#0x60 // because .Lxts_enc_tail4x +## +## aese v0.16b,v19.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v19.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v19.16b +## aesmc v24.16b,v24.16b +## aese v25.16b,v19.16b +## aesmc v25.16b,v25.16b +## aese v26.16b,v19.16b +## aesmc v26.16b,v26.16b +## +## aese v0.16b,v20.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v20.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v20.16b +## aesmc v24.16b,v24.16b +## aese v25.16b,v20.16b +## aesmc v25.16b,v25.16b +## aese v26.16b,v20.16b +## aesmc v26.16b,v26.16b +## +## aese v0.16b,v21.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v21.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v21.16b +## aesmc v24.16b,v24.16b +## aese v25.16b,v21.16b +## aesmc v25.16b,v25.16b +## aese v26.16b,v21.16b +## aesmc v26.16b,v26.16b +## +## aese v0.16b,v22.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v22.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v22.16b +## aesmc v24.16b,v24.16b +## aese v25.16b,v22.16b +## aesmc v25.16b,v25.16b +## aese v26.16b,v22.16b +## aesmc v26.16b,v26.16b +## +## eor v4.16b,v7.16b,v6.16b +## aese v0.16b,v23.16b +## // The iv for first block of one iteration +## extr x22,x10,x10,#32 +## extr x10,x10,x9,#63 +## and w11,w19,w22,asr#31 +## eor x9,x11,x9,lsl#1 +## fmov d6,x9 +## fmov v6.d[1],x10 +## eor v5.16b,v7.16b,v8.16b +## ld1 {v2.16b},[x0],#16 +## aese v1.16b,v23.16b +## // The iv for second block +## extr x22,x10,x10,#32 +## extr x10,x10,x9,#63 +## and w11,w19,w22,asr#31 +## eor x9,x11,x9,lsl#1 +## fmov d8,x9 +## fmov v8.d[1],x10 +## eor v17.16b,v7.16b,v9.16b +## ld1 {v3.16b},[x0],#16 +## aese v24.16b,v23.16b +## // The iv for third block +## extr x22,x10,x10,#32 +## extr x10,x10,x9,#63 +## and w11,w19,w22,asr#31 +## eor x9,x11,x9,lsl#1 +## fmov d9,x9 +## fmov v9.d[1],x10 +## eor v30.16b,v7.16b,v10.16b +## ld1 {v27.16b},[x0],#16 +## aese v25.16b,v23.16b +## // The iv for fourth block +## extr x22,x10,x10,#32 +## extr x10,x10,x9,#63 +## and w11,w19,w22,asr#31 +## eor x9,x11,x9,lsl#1 +## fmov d10,x9 +## fmov v10.d[1],x10 +## eor v31.16b,v7.16b,v11.16b +## ld1 {v28.16b},[x0],#16 +## aese v26.16b,v23.16b +## +## // The iv for fifth block +## extr x22,x10,x10,#32 +## extr x10,x10,x9,#63 +## and w11,w19,w22,asr #31 +## eor x9,x11,x9,lsl #1 +## fmov d11,x9 +## fmov v11.d[1],x10 +## +## ld1 {v29.16b},[x0],#16 +## cbz x6,.Lxts_enc_tail4x +## ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] +## eor v4.16b,v4.16b,v0.16b +## eor v0.16b,v2.16b,v6.16b +## eor v5.16b,v5.16b,v1.16b +## eor v1.16b,v3.16b,v8.16b +## eor v17.16b,v17.16b,v24.16b +## eor v24.16b,v27.16b,v9.16b +## eor v30.16b,v30.16b,v25.16b +## eor v25.16b,v28.16b,v10.16b +## eor v31.16b,v31.16b,v26.16b +## st1 {v4.16b},[x1],#16 +## eor v26.16b,v29.16b,v11.16b +## st1 {v5.16b},[x1],#16 +## mov w6,w5 +## st1 {v17.16b},[x1],#16 +## ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] +## st1 {v30.16b},[x1],#16 +## st1 {v31.16b},[x1],#16 +## b.hs .Loop5x_xts_enc +## +## +## // If left 4 blocks, borrow the five block's processing. +## // This means if (x2 + 1 block) == 0, which is the case +## // when input size % 5 = 4, continue processing and do +## // another iteration in Loop5x_xts_enc which will exit from +## // cbz x6,.Lxts_enc_tail4x. +## // Otherwise, this is the end of the loop continue processing +## // 0, 1, 2 or 3 blocks (with or without tail) starting at +## // Loop5x_enc_after +## cmn x2,#0x10 +## b.ne .Loop5x_enc_after +## orr v11.16b,v10.16b,v10.16b +## orr v10.16b,v9.16b,v9.16b +## orr v9.16b,v8.16b,v8.16b +## orr v8.16b,v6.16b,v6.16b +## fmov x9,d11 +## fmov x10,v11.d[1] +## eor v0.16b,v6.16b,v2.16b +## eor v1.16b,v8.16b,v3.16b +## eor v24.16b,v27.16b,v9.16b +## eor v25.16b,v28.16b,v10.16b +## eor v26.16b,v29.16b,v11.16b +## b.eq .Loop5x_xts_enc +## +## .Loop5x_enc_after: +## add x2,x2,#0x50 +## cbz x2,.Lxts_enc_done // no blocks left +## +## add w6,w5,#2 +## subs x2,x2,#0x30 +## b.lo .Lxts_inner_enc_tail // 1 or 2 blocks left +## // (with tail or not) +## +## eor v0.16b,v6.16b,v27.16b // 3 blocks left +## eor v1.16b,v8.16b,v28.16b +## eor v24.16b,v29.16b,v9.16b +## b .Lxts_outer_enc_tail +## +## .align 4 +## .Lxts_enc_tail4x: +## add x0,x0,#16 +## eor v5.16b,v1.16b,v5.16b +## st1 {v5.16b},[x1],#16 +## eor v17.16b,v24.16b,v17.16b +## st1 {v17.16b},[x1],#16 +## eor v30.16b,v25.16b,v30.16b +## eor v31.16b,v26.16b,v31.16b +## st1 {v30.16b,v31.16b},[x1],#32 +## b .Lxts_enc_done +## .align 4 +## .Lxts_outer_enc_tail: +## aese v0.16b,v16.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v16.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v16.16b +## aesmc v24.16b,v24.16b +## ld1 {v16.4s},[x7],#16 +## subs w6,w6,#2 +## aese v0.16b,v17.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v17.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v17.16b +## aesmc v24.16b,v24.16b +## ld1 {v17.4s},[x7],#16 +## b.gt .Lxts_outer_enc_tail +## +## aese v0.16b,v16.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v16.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v16.16b +## aesmc v24.16b,v24.16b +## eor v4.16b,v6.16b,v7.16b +## subs x2,x2,#0x30 +## // The iv for first block +## fmov x9,d9 +## fmov x10,v9.d[1] +## //mov w19,#0x87 +## extr x22,x10,x10,#32 +## extr x10,x10,x9,#63 +## and w11,w19,w22,asr#31 +## eor x9,x11,x9,lsl#1 +## fmov d6,x9 +## fmov v6.d[1],x10 +## eor v5.16b,v8.16b,v7.16b +## csel x6,x2,x6,lo // x6, w6, is zero at this point +## aese v0.16b,v17.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v17.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v17.16b +## aesmc v24.16b,v24.16b +## eor v17.16b,v9.16b,v7.16b +## +## add x6,x6,#0x20 +## add x0,x0,x6 +## mov x7,x3 +## +## aese v0.16b,v20.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v20.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v20.16b +## aesmc v24.16b,v24.16b +## aese v0.16b,v21.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v21.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v21.16b +## aesmc v24.16b,v24.16b +## aese v0.16b,v22.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v22.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v22.16b +## aesmc v24.16b,v24.16b +## aese v0.16b,v23.16b +## aese v1.16b,v23.16b +## aese v24.16b,v23.16b +## ld1 {v27.16b},[x0],#16 +## add w6,w5,#2 +## ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] +## eor v4.16b,v4.16b,v0.16b +## eor v5.16b,v5.16b,v1.16b +## eor v24.16b,v24.16b,v17.16b +## ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] +## st1 {v4.16b},[x1],#16 +## st1 {v5.16b},[x1],#16 +## st1 {v24.16b},[x1],#16 +## cmn x2,#0x30 +## b.eq .Lxts_enc_done +## .Lxts_encxor_one: +## orr v28.16b,v3.16b,v3.16b +## orr v29.16b,v27.16b,v27.16b +## nop +## +## .Lxts_inner_enc_tail: +## cmn x2,#0x10 +## eor v1.16b,v28.16b,v6.16b +## eor v24.16b,v29.16b,v8.16b +## b.eq .Lxts_enc_tail_loop +## eor v24.16b,v29.16b,v6.16b +## .Lxts_enc_tail_loop: +## aese v1.16b,v16.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v16.16b +## aesmc v24.16b,v24.16b +## ld1 {v16.4s},[x7],#16 +## subs w6,w6,#2 +## aese v1.16b,v17.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v17.16b +## aesmc v24.16b,v24.16b +## ld1 {v17.4s},[x7],#16 +## b.gt .Lxts_enc_tail_loop +## +## aese v1.16b,v16.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v16.16b +## aesmc v24.16b,v24.16b +## aese v1.16b,v17.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v17.16b +## aesmc v24.16b,v24.16b +## aese v1.16b,v20.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v20.16b +## aesmc v24.16b,v24.16b +## cmn x2,#0x20 +## aese v1.16b,v21.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v21.16b +## aesmc v24.16b,v24.16b +## eor v5.16b,v6.16b,v7.16b +## aese v1.16b,v22.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v22.16b +## aesmc v24.16b,v24.16b +## eor v17.16b,v8.16b,v7.16b +## aese v1.16b,v23.16b +## aese v24.16b,v23.16b +## b.eq .Lxts_enc_one +## eor v5.16b,v5.16b,v1.16b +## st1 {v5.16b},[x1],#16 +## eor v17.16b,v17.16b,v24.16b +## orr v6.16b,v8.16b,v8.16b +## st1 {v17.16b},[x1],#16 +## fmov x9,d8 +## fmov x10,v8.d[1] +## mov w19,#0x87 +## extr x22,x10,x10,#32 +## extr x10,x10,x9,#63 +## and w11,w19,w22,asr #31 +## eor x9,x11,x9,lsl #1 +## fmov d6,x9 +## fmov v6.d[1],x10 +## b .Lxts_enc_done +## +## .Lxts_enc_one: +## eor v5.16b,v5.16b,v24.16b +## orr v6.16b,v6.16b,v6.16b +## st1 {v5.16b},[x1],#16 +## fmov x9,d6 +## fmov x10,v6.d[1] +## mov w19,#0x87 +## extr x22,x10,x10,#32 +## extr x10,x10,x9,#63 +## and w11,w19,w22,asr #31 +## eor x9,x11,x9,lsl #1 +## fmov d6,x9 +## fmov v6.d[1],x10 +## b .Lxts_enc_done +## .align 5 +## .Lxts_enc_done: +## // Process the tail block with cipher stealing. +## tst x21,#0xf +## b.eq .Lxts_abort +## +## mov x20,x0 +## mov x13,x1 +## sub x1,x1,#16 +## .composite_enc_loop: +## subs x21,x21,#1 +## ldrb w15,[x1,x21] +## ldrb w14,[x20,x21] +## strb w15,[x13,x21] +## strb w14,[x1,x21] +## b.gt .composite_enc_loop +## .Lxts_enc_load_done: +## ld1 {v26.16b},[x1] +## eor v26.16b,v26.16b,v6.16b +## +## // Encrypt the composite block to get the last second encrypted text block +## ldr w6,[x3,#240] // load key schedule... +## ld1 {v0.16b},[x3],#16 +## sub w6,w6,#2 +## ld1 {v1.16b},[x3],#16 // load key schedule... +## .Loop_final_enc: +## aese v26.16b,v0.16b +## aesmc v26.16b,v26.16b +## ld1 {v0.4s},[x3],#16 +## subs w6,w6,#2 +## aese v26.16b,v1.16b +## aesmc v26.16b,v26.16b +## ld1 {v1.4s},[x3],#16 +## b.gt .Loop_final_enc +## +## aese v26.16b,v0.16b +## aesmc v26.16b,v26.16b +## ld1 {v0.4s},[x3] +## aese v26.16b,v1.16b +## eor v26.16b,v26.16b,v0.16b +## eor v26.16b,v26.16b,v6.16b +## st1 {v26.16b},[x1] +## +## .Lxts_abort: +## ldp x21,x22,[sp,#48] +## ldp d8,d9,[sp,#32] +## ldp d10,d11,[sp,#16] +## ldp x19,x20,[sp],#64 +## .Lxts_enc_final_abort: +## ret +## + +# Decrypt is taken from +# https://github.com/aws/aws-lc/blob/804a11b6f965365156b0a8b6d958233e1372a2e2/generated-src/linux-aarch64/crypto/fipsmodule/aesv8-armx.S#L1475 + +aes_hw_xts_decrypt: + cmp x2,#16 + // Original input data size bigger than 16, jump to big size processing. + b.ne .Lxts_dec_big_size + // Encrypt the iv with key2, as the first XEX iv. + ldr w6,[x4,#240] + ld1 {v0.16b},[x4],#16 + ld1 {v6.16b},[x5] + sub w6,w6,#2 + ld1 {v1.16b},[x4],#16 + +.Loop_dec_small_iv_enc: + aese v6.16b,v0.16b + aesmc v6.16b,v6.16b + ld1 {v0.4s},[x4],#16 + subs w6,w6,#2 + aese v6.16b,v1.16b + aesmc v6.16b,v6.16b + ld1 {v1.4s},[x4],#16 + b.gt .Loop_dec_small_iv_enc + + aese v6.16b,v0.16b + aesmc v6.16b,v6.16b + ld1 {v0.4s},[x4] + aese v6.16b,v1.16b + eor v6.16b,v6.16b,v0.16b + + ld1 {v0.16b},[x0] + eor v0.16b,v6.16b,v0.16b + + ldr w6,[x3,#240] + ld1 {v28.4s,v29.4s},[x3],#32 // load key schedule... + + aesd v0.16b,v28.16b + aesimc v0.16b,v0.16b + ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule... + aesd v0.16b,v29.16b + aesimc v0.16b,v0.16b + subs w6,w6,#10 // bias + b.eq .Lxts_128_dec +.Lxts_dec_round_loop: + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + ld1 {v16.4s},[x3],#16 // load key schedule... + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + ld1 {v17.4s},[x3],#16 // load key schedule... + subs w6,w6,#2 // bias + b.gt .Lxts_dec_round_loop +.Lxts_128_dec: + ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule... + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule... + aesd v0.16b,v18.16b + aesimc v0.16b,v0.16b + aesd v0.16b,v19.16b + aesimc v0.16b,v0.16b + ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule... + aesd v0.16b,v20.16b + aesimc v0.16b,v0.16b + aesd v0.16b,v21.16b + aesimc v0.16b,v0.16b + ld1 {v7.4s},[x3] + aesd v0.16b,v22.16b + aesimc v0.16b,v0.16b + aesd v0.16b,v23.16b + eor v0.16b,v0.16b,v7.16b + eor v0.16b,v6.16b,v0.16b + st1 {v0.16b},[x1] + b .Lxts_dec_final_abort +.Lxts_dec_big_size: + stp x19,x20,[sp,#-64]! + stp x21,x22,[sp,#48] + stp d8,d9,[sp,#32] + stp d10,d11,[sp,#16] + + and x21,x2,#0xf + and x2,x2,#-16 + subs x2,x2,#16 + mov x8,#16 + b.lo .Lxts_dec_abort + + // Encrypt the iv with key2, as the first XEX iv + ldr w6,[x4,#240] + ld1 {v0.16b},[x4],#16 + ld1 {v6.16b},[x5] + sub w6,w6,#2 + ld1 {v1.16b},[x4],#16 + +.Loop_dec_iv_enc: + aese v6.16b,v0.16b + aesmc v6.16b,v6.16b + ld1 {v0.4s},[x4],#16 + subs w6,w6,#2 + aese v6.16b,v1.16b + aesmc v6.16b,v6.16b + ld1 {v1.4s},[x4],#16 + b.gt .Loop_dec_iv_enc + + aese v6.16b,v0.16b + aesmc v6.16b,v6.16b + ld1 {v0.4s},[x4] + aese v6.16b,v1.16b + eor v6.16b,v6.16b,v0.16b + + // The iv for second block + // x9- iv(low), x10 - iv(high) + // the five ivs stored into, v6.16b,v8.16b,v9.16b,v10.16b,v11.16b + fmov x9,d6 + fmov x10,v6.d[1] + mov w19,#0x87 + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d8,x9 + fmov v8.d[1],x10 + + ldr w5,[x3,#240] // load rounds number + + // The iv for third block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d9,x9 + fmov v9.d[1],x10 + + ld1 {v16.4s,v17.4s},[x3] // load key schedule... + sub w5,w5,#6 + add x7,x3,x5,lsl#4 // pointer to last 7 round keys + sub w5,w5,#2 + ld1 {v18.4s,v19.4s},[x7],#32 // load key schedule... + ld1 {v20.4s,v21.4s},[x7],#32 + ld1 {v22.4s,v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + + // The iv for fourth block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d10,x9 + fmov v10.d[1],x10 + + add x7,x3,#32 + mov w6,w5 + b .Lxts_dec + + // Decryption +.align 5 +.Lxts_dec: + tst x21,#0xf + b.eq .Lxts_dec_begin + subs x2,x2,#16 + csel x8,xzr,x8,eq + ld1 {v0.16b},[x0],#16 + b.lo .Lxts_done + sub x0,x0,#16 +.Lxts_dec_begin: + ld1 {v0.16b},[x0],x8 + subs x2,x2,#32 // bias + add w6,w5,#2 + orr v3.16b,v0.16b,v0.16b + orr v1.16b,v0.16b,v0.16b + orr v28.16b,v0.16b,v0.16b + ld1 {v24.16b},[x0],#16 + orr v27.16b,v24.16b,v24.16b + orr v29.16b,v24.16b,v24.16b + b.lo .Lxts_inner_dec_tail + eor v0.16b,v0.16b,v6.16b // before decryt, xor with iv + eor v24.16b,v24.16b,v8.16b + + orr v1.16b,v24.16b,v24.16b + ld1 {v24.16b},[x0],#16 + orr v2.16b,v0.16b,v0.16b + orr v3.16b,v1.16b,v1.16b + eor v27.16b,v24.16b,v9.16b // third block xox with third iv + eor v24.16b,v24.16b,v9.16b + cmp x2,#32 + b.lo .Lxts_outer_dec_tail + + ld1 {v25.16b},[x0],#16 + + // The iv for fifth block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d11,x9 + fmov v11.d[1],x10 + + ld1 {v26.16b},[x0],#16 + eor v25.16b,v25.16b,v10.16b // the fourth block + eor v26.16b,v26.16b,v11.16b + sub x2,x2,#32 // bias + mov w6,w5 + b .Loop5x_xts_dec + +.align 4 +.Loop5x_xts_dec: + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v16.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v16.16b + aesimc v26.16b,v26.16b + ld1 {v16.4s},[x7],#16 // load key schedule... + subs w6,w6,#2 + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v17.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v17.16b + aesimc v26.16b,v26.16b + ld1 {v17.4s},[x7],#16 // load key schedule... + b.gt .Loop5x_xts_dec + + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v16.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v16.16b + aesimc v26.16b,v26.16b + subs x2,x2,#0x50 // because .Lxts_dec_tail4x + + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v17.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v17.16b + aesimc v26.16b,v26.16b + csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo + mov x7,x3 + + aesd v0.16b,v18.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v18.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v18.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v18.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v18.16b + aesimc v26.16b,v26.16b + add x0,x0,x6 // x0 is adjusted in such way that + // at exit from the loop v1.16b-v26.16b + // are loaded with last "words" + add x6,x2,#0x60 // because .Lxts_dec_tail4x + + aesd v0.16b,v19.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v19.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v19.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v19.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v19.16b + aesimc v26.16b,v26.16b + + aesd v0.16b,v20.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v20.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v20.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v20.16b + aesimc v26.16b,v26.16b + + aesd v0.16b,v21.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v21.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v21.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v21.16b + aesimc v26.16b,v26.16b + + aesd v0.16b,v22.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v22.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v22.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v22.16b + aesimc v26.16b,v26.16b + + eor v4.16b,v7.16b,v6.16b + aesd v0.16b,v23.16b + // The iv for first block of next iteration. + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d6,x9 + fmov v6.d[1],x10 + eor v5.16b,v7.16b,v8.16b + ld1 {v2.16b},[x0],#16 + aesd v1.16b,v23.16b + // The iv for second block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d8,x9 + fmov v8.d[1],x10 + eor v17.16b,v7.16b,v9.16b + ld1 {v3.16b},[x0],#16 + aesd v24.16b,v23.16b + // The iv for third block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d9,x9 + fmov v9.d[1],x10 + eor v30.16b,v7.16b,v10.16b + ld1 {v27.16b},[x0],#16 + aesd v25.16b,v23.16b + // The iv for fourth block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d10,x9 + fmov v10.d[1],x10 + eor v31.16b,v7.16b,v11.16b + ld1 {v28.16b},[x0],#16 + aesd v26.16b,v23.16b + + // The iv for fifth block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d11,x9 + fmov v11.d[1],x10 + + ld1 {v29.16b},[x0],#16 + cbz x6,.Lxts_dec_tail4x + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + eor v4.16b,v4.16b,v0.16b + eor v0.16b,v2.16b,v6.16b + eor v5.16b,v5.16b,v1.16b + eor v1.16b,v3.16b,v8.16b + eor v17.16b,v17.16b,v24.16b + eor v24.16b,v27.16b,v9.16b + eor v30.16b,v30.16b,v25.16b + eor v25.16b,v28.16b,v10.16b + eor v31.16b,v31.16b,v26.16b + st1 {v4.16b},[x1],#16 + eor v26.16b,v29.16b,v11.16b + st1 {v5.16b},[x1],#16 + mov w6,w5 + st1 {v17.16b},[x1],#16 + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v30.16b},[x1],#16 + st1 {v31.16b},[x1],#16 + b.hs .Loop5x_xts_dec + + cmn x2,#0x10 + b.ne .Loop5x_dec_after + // If x2(x2) equal to -0x10, the left blocks is 4. + // After specially processing, utilize the five blocks processing again. + // It will use the following IVs: v6.16b,v6.16b,v8.16b,v9.16b,v10.16b. + orr v11.16b,v10.16b,v10.16b + orr v10.16b,v9.16b,v9.16b + orr v9.16b,v8.16b,v8.16b + orr v8.16b,v6.16b,v6.16b + fmov x9,d11 + fmov x10,v11.d[1] + eor v0.16b,v6.16b,v2.16b + eor v1.16b,v8.16b,v3.16b + eor v24.16b,v27.16b,v9.16b + eor v25.16b,v28.16b,v10.16b + eor v26.16b,v29.16b,v11.16b + b.eq .Loop5x_xts_dec + +.Loop5x_dec_after: + add x2,x2,#0x50 + cbz x2,.Lxts_done + + add w6,w5,#2 + subs x2,x2,#0x30 + b.lo .Lxts_inner_dec_tail + + eor v0.16b,v6.16b,v27.16b + eor v1.16b,v8.16b,v28.16b + eor v24.16b,v29.16b,v9.16b + b .Lxts_outer_dec_tail + +.align 4 +.Lxts_dec_tail4x: + add x0,x0,#16 + tst x21,#0xf + eor v5.16b,v1.16b,v4.16b + st1 {v5.16b},[x1],#16 + eor v17.16b,v24.16b,v17.16b + st1 {v17.16b},[x1],#16 + eor v30.16b,v25.16b,v30.16b + eor v31.16b,v26.16b,v31.16b + st1 {v30.16b,v31.16b},[x1],#32 + + b.eq .Lxts_dec_abort + ld1 {v0.4s},[x0],#16 + b .Lxts_done +.align 4 +.Lxts_outer_dec_tail: + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + ld1 {v17.4s},[x7],#16 + b.gt .Lxts_outer_dec_tail + + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + eor v4.16b,v6.16b,v7.16b + subs x2,x2,#0x30 + // The iv for first block + fmov x9,d9 + fmov x10,v9.d[1] + mov w19,#0x87 + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d6,x9 + fmov v6.d[1],x10 + eor v5.16b,v8.16b,v7.16b + csel x6,x2,x6,lo // x6, w6, is zero at this point + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + eor v17.16b,v9.16b,v7.16b + // The iv for second block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d8,x9 + fmov v8.d[1],x10 + + add x6,x6,#0x20 + add x0,x0,x6 // x0 is adjusted to the last data + + mov x7,x3 + + // The iv for third block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d9,x9 + fmov v9.d[1],x10 + + aesd v0.16b,v20.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v20.16b + aesimc v24.16b,v24.16b + aesd v0.16b,v21.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v21.16b + aesimc v24.16b,v24.16b + aesd v0.16b,v22.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v22.16b + aesimc v24.16b,v24.16b + ld1 {v27.16b},[x0],#16 + aesd v0.16b,v23.16b + aesd v1.16b,v23.16b + aesd v24.16b,v23.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + add w6,w5,#2 + eor v4.16b,v4.16b,v0.16b + eor v5.16b,v5.16b,v1.16b + eor v24.16b,v24.16b,v17.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v4.16b},[x1],#16 + st1 {v5.16b},[x1],#16 + st1 {v24.16b},[x1],#16 + + cmn x2,#0x30 + add x2,x2,#0x30 + b.eq .Lxts_done + sub x2,x2,#0x30 + orr v28.16b,v3.16b,v3.16b + orr v29.16b,v27.16b,v27.16b + nop + +.Lxts_inner_dec_tail: + // x2 == -0x10 means two blocks left. + cmn x2,#0x10 + eor v1.16b,v28.16b,v6.16b + eor v24.16b,v29.16b,v8.16b + b.eq .Lxts_dec_tail_loop + eor v24.16b,v29.16b,v6.16b +.Lxts_dec_tail_loop: + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + ld1 {v17.4s},[x7],#16 + b.gt .Lxts_dec_tail_loop + + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v20.16b + aesimc v24.16b,v24.16b + cmn x2,#0x20 + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v21.16b + aesimc v24.16b,v24.16b + eor v5.16b,v6.16b,v7.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v22.16b + aesimc v24.16b,v24.16b + eor v17.16b,v8.16b,v7.16b + aesd v1.16b,v23.16b + aesd v24.16b,v23.16b + b.eq .Lxts_dec_one + eor v5.16b,v5.16b,v1.16b + eor v17.16b,v17.16b,v24.16b + orr v6.16b,v9.16b,v9.16b + orr v8.16b,v10.16b,v10.16b + st1 {v5.16b},[x1],#16 + st1 {v17.16b},[x1],#16 + add x2,x2,#16 + b .Lxts_done + +.Lxts_dec_one: + eor v5.16b,v5.16b,v24.16b + orr v6.16b,v8.16b,v8.16b + orr v8.16b,v9.16b,v9.16b + st1 {v5.16b},[x1],#16 + add x2,x2,#32 + +.Lxts_done: + tst x21,#0xf + b.eq .Lxts_dec_abort + // Processing the last two blocks with cipher stealing. + mov x7,x3 + cbnz x2,.Lxts_dec_1st_done + ld1 {v0.4s},[x0],#16 + + // Decrypt the last secod block to get the last plain text block +.Lxts_dec_1st_done: + eor v26.16b,v0.16b,v8.16b + ldr w6,[x3,#240] + ld1 {v0.4s},[x3],#16 + sub w6,w6,#2 + ld1 {v1.4s},[x3],#16 +.Loop_final_2nd_dec: + aesd v26.16b,v0.16b + aesimc v26.16b,v26.16b + ld1 {v0.4s},[x3],#16 // load key schedule... + subs w6,w6,#2 + aesd v26.16b,v1.16b + aesimc v26.16b,v26.16b + ld1 {v1.4s},[x3],#16 // load key schedule... + b.gt .Loop_final_2nd_dec + + aesd v26.16b,v0.16b + aesimc v26.16b,v26.16b + ld1 {v0.4s},[x3] + aesd v26.16b,v1.16b + eor v26.16b,v26.16b,v0.16b + eor v26.16b,v26.16b,v8.16b + st1 {v26.16b},[x1] + + mov x20,x0 + add x13,x1,#16 + + // Composite the tailcnt "16 byte not aligned block" into the last second plain blocks + // to get the last encrypted block. +.composite_dec_loop: + subs x21,x21,#1 + ldrb w15,[x1,x21] + ldrb w14,[x20,x21] + strb w15,[x13,x21] + strb w14,[x1,x21] + b.gt .composite_dec_loop +.Lxts_dec_load_done: + ld1 {v26.16b},[x1] + eor v26.16b,v26.16b,v6.16b + + // Decrypt the composite block to get the last second plain text block + ldr w6,[x7,#240] + ld1 {v0.16b},[x7],#16 + sub w6,w6,#2 + ld1 {v1.16b},[x7],#16 +.Loop_final_dec: + aesd v26.16b,v0.16b + aesimc v26.16b,v26.16b + ld1 {v0.4s},[x7],#16 // load key schedule... + subs w6,w6,#2 + aesd v26.16b,v1.16b + aesimc v26.16b,v26.16b + ld1 {v1.4s},[x7],#16 // load key schedule... + b.gt .Loop_final_dec + + aesd v26.16b,v0.16b + aesimc v26.16b,v26.16b + ld1 {v0.4s},[x7] + aesd v26.16b,v1.16b + eor v26.16b,v26.16b,v0.16b + eor v26.16b,v26.16b,v6.16b + st1 {v26.16b},[x1] + +.Lxts_dec_abort: + ldp x21,x22,[sp,#48] + ldp d8,d9,[sp,#32] + ldp d10,d11,[sp,#16] + ldp x19,x20,[sp],#64 + +.Lxts_dec_final_abort: + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/arm/aes-xts/aes-xts-armv8.txt b/arm/aes-xts/aes-xts-armv8.txt new file mode 100644 index 000000000..78438bffb --- /dev/null +++ b/arm/aes-xts/aes-xts-armv8.txt @@ -0,0 +1,1124 @@ +[ + 0xf100405f; (* arm_CMP X2 (rvalue (word 16)) *) + 0x540007e1; (* arm_BNE (word 252) *) + 0xb940f086; (* arm_LDR W6 X4 (Immediate_Offset (word 240)) *) + 0x4cdf7080; (* arm_LDR Q0 X4 (Postimmediate_Offset (word 16)) *) + 0x4c4070a6; (* arm_LDR Q6 X5 No_Offset *) + 0x510008c6; (* arm_SUB W6 W6 (rvalue (word 2)) *) + 0x4cdf7081; (* arm_LDR Q1 X4 (Postimmediate_Offset (word 16)) *) + 0x4e284806; (* arm_AESE Q6 Q0 *) + 0x4e2868c6; (* arm_AESMC Q6 Q6 *) + 0x4cdf7880; (* arm_LDR Q0 X4 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e284826; (* arm_AESE Q6 Q1 *) + 0x4e2868c6; (* arm_AESMC Q6 Q6 *) + 0x4cdf7881; (* arm_LDR Q1 X4 (Postimmediate_Offset (word 16)) *) + 0x54ffff2c; (* arm_BGT (word 2097124) *) + 0x4e284806; (* arm_AESE Q6 Q0 *) + 0x4e2868c6; (* arm_AESMC Q6 Q6 *) + 0x4c407880; (* arm_LDR Q0 X4 No_Offset *) + 0x4e284826; (* arm_AESE Q6 Q1 *) + 0x6e201cc6; (* arm_EOR_VEC Q6 Q6 Q0 128 *) + 0x4c407000; (* arm_LDR Q0 X0 No_Offset *) + 0x6e201cc0; (* arm_EOR_VEC Q0 Q6 Q0 128 *) + 0xb940f066; (* arm_LDR W6 X3 (Immediate_Offset (word 240)) *) + 0x4cdfa87c; (* arm_LDP Q28 Q29 X3 (Postimmediate_Offset (word 32)) *) + 0x4e284b80; (* arm_AESE Q0 Q28 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4cdfa870; (* arm_LDP Q16 Q17 X3 (Postimmediate_Offset (word 32)) *) + 0x4e284ba0; (* arm_AESE Q0 Q29 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x710028c6; (* arm_SUBS W6 W6 (rvalue (word 10)) *) + 0x4e284a00; (* arm_AESE Q0 Q16 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4cdf7870; (* arm_LDR Q16 X3 (Postimmediate_Offset (word 16)) *) + 0x4e284a20; (* arm_AESE Q0 Q17 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4cdf7871; (* arm_LDR Q17 X3 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x54ffff2c; (* arm_BGT (word 2097124) *) + 0x4cdfa872; (* arm_LDP Q18 Q19 X3 (Postimmediate_Offset (word 32)) *) + 0x4e284a00; (* arm_AESE Q0 Q16 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a20; (* arm_AESE Q0 Q17 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4cdfa874; (* arm_LDP Q20 Q21 X3 (Postimmediate_Offset (word 32)) *) + 0x4e284a40; (* arm_AESE Q0 Q18 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a60; (* arm_AESE Q0 Q19 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4cdfa876; (* arm_LDP Q22 Q23 X3 (Postimmediate_Offset (word 32)) *) + 0x4e284a80; (* arm_AESE Q0 Q20 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284aa0; (* arm_AESE Q0 Q21 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4c407867; (* arm_LDR Q7 X3 No_Offset *) + 0x4e284ac0; (* arm_AESE Q0 Q22 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284ae0; (* arm_AESE Q0 Q23 *) + 0x6e271c00; (* arm_EOR_VEC Q0 Q0 Q7 128 *) + 0x6e261c00; (* arm_EOR_VEC Q0 Q0 Q6 128 *) + 0x4c007020; (* arm_STR Q0 X1 No_Offset *) + 0x140001e8; (* arm_B (word 1952) *) + 0xd503201f; (* arm_NOP *) + 0xd503201f; (* arm_NOP *) + 0xd503201f; (* arm_NOP *) + 0xa9bc53f3; (* arm_STP X19 X20 SP (Preimmediate_Offset (iword (-- &64))) *) + 0xa9035bf5; (* arm_STP X21 X22 SP (Immediate_Offset (iword (&48))) *) + 0x6d0227e8; (* arm_STP D8 D9 SP (Immediate_Offset (iword (&32))) *) + 0x6d012fea; (* arm_STP D10 D11 SP (Immediate_Offset (iword (&16))) *) + 0x92400c55; (* arm_AND X21 X2 (rvalue (word 15)) *) + 0x927cec42; (* arm_AND X2 X2 (rvalue (word 18446744073709551600)) *) + 0xf1004042; (* arm_SUBS X2 X2 (rvalue (word 16)) *) + 0xd2800208; (* arm_MOV X8 (rvalue (word 16)) *) + 0x54003b03; (* arm_BCC (word 1888) *) + 0x9a8803e8; (* arm_CSEL X8 XZR X8 Condition_EQ *) + 0xb940f086; (* arm_LDR W6 X4 (Immediate_Offset (word 240)) *) + 0x4cdf7880; (* arm_LDR Q0 X4 (Postimmediate_Offset (word 16)) *) + 0x4c4070a6; (* arm_LDR Q6 X5 No_Offset *) + 0x510008c6; (* arm_SUB W6 W6 (rvalue (word 2)) *) + 0x4cdf7881; (* arm_LDR Q1 X4 (Postimmediate_Offset (word 16)) *) + 0x4e284806; (* arm_AESE Q6 Q0 *) + 0x4e2868c6; (* arm_AESMC Q6 Q6 *) + 0x4cdf7880; (* arm_LDR Q0 X4 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e284826; (* arm_AESE Q6 Q1 *) + 0x4e2868c6; (* arm_AESMC Q6 Q6 *) + 0x4cdf7881; (* arm_LDR Q1 X4 (Postimmediate_Offset (word 16)) *) + 0x54ffff2c; (* arm_BGT (word 2097124) *) + 0x4e284806; (* arm_AESE Q6 Q0 *) + 0x4e2868c6; (* arm_AESMC Q6 Q6 *) + 0x4c407880; (* arm_LDR Q0 X4 No_Offset *) + 0x4e284826; (* arm_AESE Q6 Q1 *) + 0x6e201cc6; (* arm_EOR_VEC Q6 Q6 Q0 128 *) + 0x9e6600c9; (* arm_FMOV_FtoI X9 Q6 0 *) + 0x9eae00ca; (* arm_FMOV_FtoI X10 Q6 1 *) + 0x528010f3; (* arm_MOV W19 (rvalue (word 135)) *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670128; (* arm_FMOV_ItoF Q8 X9 0 *) + 0x9eaf0148; (* arm_FMOV_ItoF Q8 X10 1 *) + 0xb940f065; (* arm_LDR W5 X3 (Immediate_Offset (word 240)) *) + 0x4cc87000; (* arm_LDR Q0 X0 (Postreg_Offset X8) *) + 0x4c40a870; (* arm_LDP Q16 Q17 X3 No_Offset *) + 0x510018a5; (* arm_SUB W5 W5 (rvalue (word 6)) *) + 0x8b051067; (* arm_ADD X7 X3 (Shiftedreg X5 LSL 4) *) + 0x4cdfa8f2; (* arm_LDP Q18 Q19 X7 (Postimmediate_Offset (word 32)) *) + 0x4cdfa8f4; (* arm_LDP Q20 Q21 X7 (Postimmediate_Offset (word 32)) *) + 0x4cdfa8f6; (* arm_LDP Q22 Q23 X7 (Postimmediate_Offset (word 32)) *) + 0x4c4078e7; (* arm_LDR Q7 X7 No_Offset *) + 0x510008a5; (* arm_SUB W5 W5 (rvalue (word 2)) *) + 0x91008067; (* arm_ADD X7 X3 (rvalue (word 32)) *) + 0x2a0503e6; (* arm_MOV W6 W5 *) + 0x4cdf7018; (* arm_LDR Q24 X0 (Postimmediate_Offset (word 16)) *) + 0xf1008042; (* arm_SUBS X2 X2 (rvalue (word 32)) *) + 0x110008a6; (* arm_ADD W6 W5 (rvalue (word 2)) *) + 0x4ea01c03; (* arm_MOV_VEC Q3 Q0 128 *) + 0x4ea01c01; (* arm_MOV_VEC Q1 Q0 128 *) + 0x4ea01c1c; (* arm_MOV_VEC Q28 Q0 128 *) + 0x4eb81f1b; (* arm_MOV_VEC Q27 Q24 128 *) + 0x4eb81f1d; (* arm_MOV_VEC Q29 Q24 128 *) + 0x54002723; (* arm_BCC (word 1252) *) + 0x6e261c00; (* arm_EOR_VEC Q0 Q0 Q6 128 *) + 0x6e281f18; (* arm_EOR_VEC Q24 Q24 Q8 128 *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670129; (* arm_FMOV_ItoF Q9 X9 0 *) + 0x9eaf0149; (* arm_FMOV_ItoF Q9 X10 1 *) + 0x4eb81f01; (* arm_MOV_VEC Q1 Q24 128 *) + 0x4cdf7018; (* arm_LDR Q24 X0 (Postimmediate_Offset (word 16)) *) + 0x4ea01c02; (* arm_MOV_VEC Q2 Q0 128 *) + 0x4ea11c23; (* arm_MOV_VEC Q3 Q1 128 *) + 0x6e291f1b; (* arm_EOR_VEC Q27 Q24 Q9 128 *) + 0x6e291f18; (* arm_EOR_VEC Q24 Q24 Q9 128 *) + 0xf100805f; (* arm_CMP X2 (rvalue (word 32)) *) + 0x54001b63; (* arm_BCC (word 876) *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e67012a; (* arm_FMOV_ItoF Q10 X9 0 *) + 0x9eaf014a; (* arm_FMOV_ItoF Q10 X10 1 *) + 0x4cdf7019; (* arm_LDR Q25 X0 (Postimmediate_Offset (word 16)) *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e67012b; (* arm_FMOV_ItoF Q11 X9 0 *) + 0x9eaf014b; (* arm_FMOV_ItoF Q11 X10 1 *) + 0x4cdf701a; (* arm_LDR Q26 X0 (Postimmediate_Offset (word 16)) *) + 0x6e2a1f39; (* arm_EOR_VEC Q25 Q25 Q10 128 *) + 0x6e2b1f5a; (* arm_EOR_VEC Q26 Q26 Q11 128 *) + 0xd1008042; (* arm_SUB X2 X2 (rvalue (word 32)) *) + 0x2a0503e6; (* arm_MOV W6 W5 *) + 0x4e284a00; (* arm_AESE Q0 Q16 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a01; (* arm_AESE Q1 Q16 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a18; (* arm_AESE Q24 Q16 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284a19; (* arm_AESE Q25 Q16 *) + 0x4e286b39; (* arm_AESMC Q25 Q25 *) + 0x4e284a1a; (* arm_AESE Q26 Q16 *) + 0x4e286b5a; (* arm_AESMC Q26 Q26 *) + 0x4cdf78f0; (* arm_LDR Q16 X7 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e284a20; (* arm_AESE Q0 Q17 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a21; (* arm_AESE Q1 Q17 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a38; (* arm_AESE Q24 Q17 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284a39; (* arm_AESE Q25 Q17 *) + 0x4e286b39; (* arm_AESMC Q25 Q25 *) + 0x4e284a3a; (* arm_AESE Q26 Q17 *) + 0x4e286b5a; (* arm_AESMC Q26 Q26 *) + 0x4cdf78f1; (* arm_LDR Q17 X7 (Postimmediate_Offset (word 16)) *) + 0x54fffd2c; (* arm_BGT (word 2097060) *) + 0x4e284a00; (* arm_AESE Q0 Q16 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a01; (* arm_AESE Q1 Q16 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a18; (* arm_AESE Q24 Q16 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284a19; (* arm_AESE Q25 Q16 *) + 0x4e286b39; (* arm_AESMC Q25 Q25 *) + 0x4e284a1a; (* arm_AESE Q26 Q16 *) + 0x4e286b5a; (* arm_AESMC Q26 Q26 *) + 0xf1014042; (* arm_SUBS X2 X2 (rvalue (word 80)) *) + 0x4e284a20; (* arm_AESE Q0 Q17 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a21; (* arm_AESE Q1 Q17 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a38; (* arm_AESE Q24 Q17 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284a39; (* arm_AESE Q25 Q17 *) + 0x4e286b39; (* arm_AESMC Q25 Q25 *) + 0x4e284a3a; (* arm_AESE Q26 Q17 *) + 0x4e286b5a; (* arm_AESMC Q26 Q26 *) + 0x9a82c3e6; (* arm_CSEL X6 XZR X2 Condition_GT *) + 0xaa0303e7; (* arm_MOV X7 X3 *) + 0x4e284a40; (* arm_AESE Q0 Q18 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a41; (* arm_AESE Q1 Q18 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a58; (* arm_AESE Q24 Q18 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284a59; (* arm_AESE Q25 Q18 *) + 0x4e286b39; (* arm_AESMC Q25 Q25 *) + 0x4e284a5a; (* arm_AESE Q26 Q18 *) + 0x4e286b5a; (* arm_AESMC Q26 Q26 *) + 0x8b060000; (* arm_ADD X0 X0 X6 *) + 0x91018046; (* arm_ADD X6 X2 (rvalue (word 96)) *) + 0x4e284a60; (* arm_AESE Q0 Q19 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a61; (* arm_AESE Q1 Q19 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a78; (* arm_AESE Q24 Q19 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284a79; (* arm_AESE Q25 Q19 *) + 0x4e286b39; (* arm_AESMC Q25 Q25 *) + 0x4e284a7a; (* arm_AESE Q26 Q19 *) + 0x4e286b5a; (* arm_AESMC Q26 Q26 *) + 0x4e284a80; (* arm_AESE Q0 Q20 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a81; (* arm_AESE Q1 Q20 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a98; (* arm_AESE Q24 Q20 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284a99; (* arm_AESE Q25 Q20 *) + 0x4e286b39; (* arm_AESMC Q25 Q25 *) + 0x4e284a9a; (* arm_AESE Q26 Q20 *) + 0x4e286b5a; (* arm_AESMC Q26 Q26 *) + 0x4e284aa0; (* arm_AESE Q0 Q21 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284aa1; (* arm_AESE Q1 Q21 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284ab8; (* arm_AESE Q24 Q21 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284ab9; (* arm_AESE Q25 Q21 *) + 0x4e286b39; (* arm_AESMC Q25 Q25 *) + 0x4e284aba; (* arm_AESE Q26 Q21 *) + 0x4e286b5a; (* arm_AESMC Q26 Q26 *) + 0x4e284ac0; (* arm_AESE Q0 Q22 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284ac1; (* arm_AESE Q1 Q22 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284ad8; (* arm_AESE Q24 Q22 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284ad9; (* arm_AESE Q25 Q22 *) + 0x4e286b39; (* arm_AESMC Q25 Q25 *) + 0x4e284ada; (* arm_AESE Q26 Q22 *) + 0x4e286b5a; (* arm_AESMC Q26 Q26 *) + 0x4e284ae0; (* arm_AESE Q0 Q23 *) + 0x4e284ae1; (* arm_AESE Q1 Q23 *) + 0x4e284af8; (* arm_AESE Q24 Q23 *) + 0x4e284af9; (* arm_AESE Q25 Q23 *) + 0x4e284afa; (* arm_AESE Q26 Q23 *) + 0x6e261ce4; (* arm_EOR_VEC Q4 Q7 Q6 128 *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670126; (* arm_FMOV_ItoF Q6 X9 0 *) + 0x9eaf0146; (* arm_FMOV_ItoF Q6 X10 1 *) + 0x6e281ce5; (* arm_EOR_VEC Q5 Q7 Q8 128 *) + 0x4cdf7002; (* arm_LDR Q2 X0 (Postimmediate_Offset (word 16)) *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670128; (* arm_FMOV_ItoF Q8 X9 0 *) + 0x9eaf0148; (* arm_FMOV_ItoF Q8 X10 1 *) + 0x6e291cf1; (* arm_EOR_VEC Q17 Q7 Q9 128 *) + 0x4cdf7003; (* arm_LDR Q3 X0 (Postimmediate_Offset (word 16)) *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670129; (* arm_FMOV_ItoF Q9 X9 0 *) + 0x9eaf0149; (* arm_FMOV_ItoF Q9 X10 1 *) + 0x6e2a1cfe; (* arm_EOR_VEC Q30 Q7 Q10 128 *) + 0x4cdf701b; (* arm_LDR Q27 X0 (Postimmediate_Offset (word 16)) *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e67012a; (* arm_FMOV_ItoF Q10 X9 0 *) + 0x9eaf014a; (* arm_FMOV_ItoF Q10 X10 1 *) + 0x6e2b1cff; (* arm_EOR_VEC Q31 Q7 Q11 128 *) + 0x4cdf701c; (* arm_LDR Q28 X0 (Postimmediate_Offset (word 16)) *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e67012b; (* arm_FMOV_ItoF Q11 X9 0 *) + 0x9eaf014b; (* arm_FMOV_ItoF Q11 X10 1 *) + 0x4cdf701d; (* arm_LDR Q29 X0 (Postimmediate_Offset (word 16)) *) + 0xb4000586; (* arm_CBZ X6 (word 176) *) + 0x6e201c84; (* arm_EOR_VEC Q4 Q4 Q0 128 *) + 0x6e261c40; (* arm_EOR_VEC Q0 Q2 Q6 128 *) + 0x6e211ca5; (* arm_EOR_VEC Q5 Q5 Q1 128 *) + 0x6e281c61; (* arm_EOR_VEC Q1 Q3 Q8 128 *) + 0x6e381e31; (* arm_EOR_VEC Q17 Q17 Q24 128 *) + 0x6e291f78; (* arm_EOR_VEC Q24 Q27 Q9 128 *) + 0x6e391fde; (* arm_EOR_VEC Q30 Q30 Q25 128 *) + 0x6e2a1f99; (* arm_EOR_VEC Q25 Q28 Q10 128 *) + 0x6e3a1fff; (* arm_EOR_VEC Q31 Q31 Q26 128 *) + 0x6e2b1fba; (* arm_EOR_VEC Q26 Q29 Q11 128 *) + 0x2a0503e6; (* arm_MOV W6 W5 *) + 0x4c9fa024; (* arm_STP Q4 Q5 X1 (Postimmediate_Offset (word 32)) *) + 0x4c9f7031; (* arm_STR Q17 X1 (Postimmediate_Offset (word 16)) *) + 0x4c9fa03e; (* arm_STP Q30 Q31 X1 (Postimmediate_Offset (word 32)) *) + 0x4cdf78f0; (* arm_LDR Q16 X7 (Postimmediate_Offset (word 16)) *) + 0x4cdf78f1; (* arm_LDR Q17 X7 (Postimmediate_Offset (word 16)) *) + 0x54ffebe2; (* arm_BCS (word 2096508) *) + 0xb100405f; (* arm_CMN X2 (rvalue (word 16)) *) + 0x540001a1; (* arm_BNE (word 52) *) + 0x4eaa1d4b; (* arm_MOV_VEC Q11 Q10 128 *) + 0x4ea91d2a; (* arm_MOV_VEC Q10 Q9 128 *) + 0x4ea81d09; (* arm_MOV_VEC Q9 Q8 128 *) + 0x4ea61cc8; (* arm_MOV_VEC Q8 Q6 128 *) + 0x9e660169; (* arm_FMOV_FtoI X9 Q11 0 *) + 0x9eae016a; (* arm_FMOV_FtoI X10 Q11 1 *) + 0x6e221cc0; (* arm_EOR_VEC Q0 Q6 Q2 128 *) + 0x6e231d01; (* arm_EOR_VEC Q1 Q8 Q3 128 *) + 0x6e291f78; (* arm_EOR_VEC Q24 Q27 Q9 128 *) + 0x6e2a1f99; (* arm_EOR_VEC Q25 Q28 Q10 128 *) + 0x6e2b1fba; (* arm_EOR_VEC Q26 Q29 Q11 128 *) + 0x54ffea20; (* arm_BEQ (word 2096452) *) + 0x91014042; (* arm_ADD X2 X2 (rvalue (word 80)) *) + 0xb4001662; (* arm_CBZ X2 (word 716) *) + 0x110008a6; (* arm_ADD W6 W5 (rvalue (word 2)) *) + 0xf100c042; (* arm_SUBS X2 X2 (rvalue (word 48)) *) + 0x54000c43; (* arm_BCC (word 392) *) + 0x6e3b1cc0; (* arm_EOR_VEC Q0 Q6 Q27 128 *) + 0x6e3c1d01; (* arm_EOR_VEC Q1 Q8 Q28 128 *) + 0x6e291fb8; (* arm_EOR_VEC Q24 Q29 Q9 128 *) + 0x14000010; (* arm_B (word 64) *) + 0xd503201f; (* arm_NOP *) + 0xd503201f; (* arm_NOP *) + 0xd503201f; (* arm_NOP *) + 0x91004000; (* arm_ADD X0 X0 (rvalue (word 16)) *) + 0x6e251c25; (* arm_EOR_VEC Q5 Q1 Q5 128 *) + 0x4c9f7025; (* arm_STR Q5 X1 (Postimmediate_Offset (word 16)) *) + 0x6e311f11; (* arm_EOR_VEC Q17 Q24 Q17 128 *) + 0x4c9f7031; (* arm_STR Q17 X1 (Postimmediate_Offset (word 16)) *) + 0x6e3e1f3e; (* arm_EOR_VEC Q30 Q25 Q30 128 *) + 0x6e3f1f5f; (* arm_EOR_VEC Q31 Q26 Q31 128 *) + 0x4c9fa03e; (* arm_STP Q30 Q31 X1 (Postimmediate_Offset (word 32)) *) + 0x140000a0; (* arm_B (word 640) *) + 0xd503201f; (* arm_NOP *) + 0xd503201f; (* arm_NOP *) + 0xd503201f; (* arm_NOP *) + 0x4e284a00; (* arm_AESE Q0 Q16 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a01; (* arm_AESE Q1 Q16 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a18; (* arm_AESE Q24 Q16 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4cdf78f0; (* arm_LDR Q16 X7 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e284a20; (* arm_AESE Q0 Q17 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a21; (* arm_AESE Q1 Q17 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a38; (* arm_AESE Q24 Q17 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4cdf78f1; (* arm_LDR Q17 X7 (Postimmediate_Offset (word 16)) *) + 0x54fffe2c; (* arm_BGT (word 2097092) *) + 0x4e284a00; (* arm_AESE Q0 Q16 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a01; (* arm_AESE Q1 Q16 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a18; (* arm_AESE Q24 Q16 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x6e271cc4; (* arm_EOR_VEC Q4 Q6 Q7 128 *) + 0x4e284a20; (* arm_AESE Q0 Q17 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a21; (* arm_AESE Q1 Q17 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a38; (* arm_AESE Q24 Q17 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284a80; (* arm_AESE Q0 Q20 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a81; (* arm_AESE Q1 Q20 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a98; (* arm_AESE Q24 Q20 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284aa0; (* arm_AESE Q0 Q21 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284aa1; (* arm_AESE Q1 Q21 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284ab8; (* arm_AESE Q24 Q21 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284ac0; (* arm_AESE Q0 Q22 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284ac1; (* arm_AESE Q1 Q22 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284ad8; (* arm_AESE Q24 Q22 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284ae0; (* arm_AESE Q0 Q23 *) + 0x4e284ae1; (* arm_AESE Q1 Q23 *) + 0x4e284af8; (* arm_AESE Q24 Q23 *) + 0x6e271d31; (* arm_EOR_VEC Q17 Q9 Q7 128 *) + 0xf100c042; (* arm_SUBS X2 X2 (rvalue (word 48)) *) + 0x9e660129; (* arm_FMOV_FtoI X9 Q9 0 *) + 0x9eae012a; (* arm_FMOV_FtoI X10 Q9 1 *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670126; (* arm_FMOV_ItoF Q6 X9 0 *) + 0x9eaf0146; (* arm_FMOV_ItoF Q6 X10 1 *) + 0x6e271d05; (* arm_EOR_VEC Q5 Q8 Q7 128 *) + 0x9a863046; (* arm_CSEL X6 X2 X6 Condition_CC *) + 0x910080c6; (* arm_ADD X6 X6 (rvalue (word 32)) *) + 0x8b060000; (* arm_ADD X0 X0 X6 *) + 0xaa0303e7; (* arm_MOV X7 X3 *) + 0x4cdf701b; (* arm_LDR Q27 X0 (Postimmediate_Offset (word 16)) *) + 0x110008a6; (* arm_ADD W6 W5 (rvalue (word 2)) *) + 0x6e201c84; (* arm_EOR_VEC Q4 Q4 Q0 128 *) + 0x6e211ca5; (* arm_EOR_VEC Q5 Q5 Q1 128 *) + 0x6e311f18; (* arm_EOR_VEC Q24 Q24 Q17 128 *) + 0x4cdfa8f0; (* arm_LDP Q16 Q17 X7 (Postimmediate_Offset (word 32)) *) + 0x4c9fa024; (* arm_STP Q4 Q5 X1 (Postimmediate_Offset (word 32)) *) + 0x4c9f7038; (* arm_STR Q24 X1 (Postimmediate_Offset (word 16)) *) + 0xb100c05f; (* arm_CMN X2 (rvalue (word 48)) *) + 0x54000a40; (* arm_BEQ (word 328) *) + 0x4ea31c7c; (* arm_MOV_VEC Q28 Q3 128 *) + 0x4ebb1f7d; (* arm_MOV_VEC Q29 Q27 128 *) + 0xd503201f; (* arm_NOP *) + 0xb100405f; (* arm_CMN X2 (rvalue (word 16)) *) + 0x6e261f81; (* arm_EOR_VEC Q1 Q28 Q6 128 *) + 0x6e281fb8; (* arm_EOR_VEC Q24 Q29 Q8 128 *) + 0x54000040; (* arm_BEQ (word 8) *) + 0x6e261fb8; (* arm_EOR_VEC Q24 Q29 Q6 128 *) + 0x4e284a01; (* arm_AESE Q1 Q16 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a18; (* arm_AESE Q24 Q16 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4cdf78f0; (* arm_LDR Q16 X7 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e284a21; (* arm_AESE Q1 Q17 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a38; (* arm_AESE Q24 Q17 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4cdf78f1; (* arm_LDR Q17 X7 (Postimmediate_Offset (word 16)) *) + 0x54fffeac; (* arm_BGT (word 2097108) *) + 0x4e284a01; (* arm_AESE Q1 Q16 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a18; (* arm_AESE Q24 Q16 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284a21; (* arm_AESE Q1 Q17 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a38; (* arm_AESE Q24 Q17 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284a81; (* arm_AESE Q1 Q20 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a98; (* arm_AESE Q24 Q20 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0xb100805f; (* arm_CMN X2 (rvalue (word 32)) *) + 0x4e284aa1; (* arm_AESE Q1 Q21 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284ab8; (* arm_AESE Q24 Q21 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x6e271cc5; (* arm_EOR_VEC Q5 Q6 Q7 128 *) + 0x4e284ac1; (* arm_AESE Q1 Q22 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284ad8; (* arm_AESE Q24 Q22 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x6e271d11; (* arm_EOR_VEC Q17 Q8 Q7 128 *) + 0x4e284ae1; (* arm_AESE Q1 Q23 *) + 0x4e284af8; (* arm_AESE Q24 Q23 *) + 0x54000200; (* arm_BEQ (word 64) *) + 0x6e211ca5; (* arm_EOR_VEC Q5 Q5 Q1 128 *) + 0x4c9f7025; (* arm_STR Q5 X1 (Postimmediate_Offset (word 16)) *) + 0x6e381e31; (* arm_EOR_VEC Q17 Q17 Q24 128 *) + 0x4ea81d06; (* arm_MOV_VEC Q6 Q8 128 *) + 0x4c9f7031; (* arm_STR Q17 X1 (Postimmediate_Offset (word 16)) *) + 0x9e660109; (* arm_FMOV_FtoI X9 Q8 0 *) + 0x9eae010a; (* arm_FMOV_FtoI X10 Q8 1 *) + 0x528010f3; (* arm_MOV W19 (rvalue (word 135)) *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670126; (* arm_FMOV_ItoF Q6 X9 0 *) + 0x9eaf0146; (* arm_FMOV_ItoF Q6 X10 1 *) + 0x14000015; (* arm_B (word 84) *) + 0x6e381ca5; (* arm_EOR_VEC Q5 Q5 Q24 128 *) + 0x4ea61cc6; (* arm_MOV_VEC Q6 Q6 128 *) + 0x4c9f7025; (* arm_STR Q5 X1 (Postimmediate_Offset (word 16)) *) + 0x9e6600c9; (* arm_FMOV_FtoI X9 Q6 0 *) + 0x9eae00ca; (* arm_FMOV_FtoI X10 Q6 1 *) + 0x528010f3; (* arm_MOV W19 (rvalue (word 135)) *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670126; (* arm_FMOV_ItoF Q6 X9 0 *) + 0x9eaf0146; (* arm_FMOV_ItoF Q6 X10 1 *) + 0x14000008; (* arm_B (word 32) *) + 0xd503201f; (* arm_NOP *) + 0xd503201f; (* arm_NOP *) + 0xd503201f; (* arm_NOP *) + 0xd503201f; (* arm_NOP *) + 0xd503201f; (* arm_NOP *) + 0xd503201f; (* arm_NOP *) + 0xd503201f; (* arm_NOP *) + 0xf2400ebf; (* arm_TST X21 (rvalue (word 15)) *) + 0x540003e0; (* arm_BEQ (word 124) *) + 0xaa0003f4; (* arm_MOV X20 X0 *) + 0xaa0103ed; (* arm_MOV X13 X1 *) + 0xd1004021; (* arm_SUB X1 X1 (rvalue (word 16)) *) + 0xf10006b5; (* arm_SUBS X21 X21 (rvalue (word 1)) *) + 0x3875682f; (* arm_LDRB W15 X1 (Register_Offset X21) *) + 0x38756a8e; (* arm_LDRB W14 X20 (Register_Offset X21) *) + 0x383569af; (* arm_STRB W15 X13 (Register_Offset X21) *) + 0x3835682e; (* arm_STRB W14 X1 (Register_Offset X21) *) + 0x54ffff6c; (* arm_BGT (word 2097132) *) + 0x4c40703a; (* arm_LDR Q26 X1 No_Offset *) + 0x6e261f5a; (* arm_EOR_VEC Q26 Q26 Q6 128 *) + 0xb940f066; (* arm_LDR W6 X3 (Immediate_Offset (word 240)) *) + 0x4cdf7060; (* arm_LDR Q0 X3 (Postimmediate_Offset (word 16)) *) + 0x510008c6; (* arm_SUB W6 W6 (rvalue (word 2)) *) + 0x4cdf7061; (* arm_LDR Q1 X3 (Postimmediate_Offset (word 16)) *) + 0x4e28481a; (* arm_AESE Q26 Q0 *) + 0x4e286b5a; (* arm_AESMC Q26 Q26 *) + 0x4cdf7860; (* arm_LDR Q0 X3 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e28483a; (* arm_AESE Q26 Q1 *) + 0x4e286b5a; (* arm_AESMC Q26 Q26 *) + 0x4cdf7861; (* arm_LDR Q1 X3 (Postimmediate_Offset (word 16)) *) + 0x54ffff2c; (* arm_BGT (word 2097124) *) + 0x4e28481a; (* arm_AESE Q26 Q0 *) + 0x4e286b5a; (* arm_AESMC Q26 Q26 *) + 0x4c407860; (* arm_LDR Q0 X3 No_Offset *) + 0x4e28483a; (* arm_AESE Q26 Q1 *) + 0x6e201f5a; (* arm_EOR_VEC Q26 Q26 Q0 128 *) + 0x6e261f5a; (* arm_EOR_VEC Q26 Q26 Q6 128 *) + 0x4c00703a; (* arm_STR Q26 X1 No_Offset *) + 0xa9435bf5; (* arm_LDP X21 X22 SP (Immediate_Offset (iword (&48))) *) + 0x6d4227e8; (* arm_LDP D8 D9 SP (Immediate_Offset (iword (&32))) *) + 0x6d412fea; (* arm_LDP D10 D11 SP (Immediate_Offset (iword (&16))) *) + 0xa8c453f3; (* arm_LDP X19 X20 SP (Postimmediate_Offset (iword (&64))) *) + 0xd65f03c0; (* arm_RET X30 *) + 0xf100405f; (* arm_CMP X2 (rvalue (word 16)) *) + 0x540007a1; (* arm_BNE (word 244) *) + 0xb940f086; (* arm_LDR W6 X4 (Immediate_Offset (word 240)) *) + 0x4cdf7080; (* arm_LDR Q0 X4 (Postimmediate_Offset (word 16)) *) + 0x4c4070a6; (* arm_LDR Q6 X5 No_Offset *) + 0x510008c6; (* arm_SUB W6 W6 (rvalue (word 2)) *) + 0x4cdf7081; (* arm_LDR Q1 X4 (Postimmediate_Offset (word 16)) *) + 0x4e284806; (* arm_AESE Q6 Q0 *) + 0x4e2868c6; (* arm_AESMC Q6 Q6 *) + 0x4cdf7880; (* arm_LDR Q0 X4 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e284826; (* arm_AESE Q6 Q1 *) + 0x4e2868c6; (* arm_AESMC Q6 Q6 *) + 0x4cdf7881; (* arm_LDR Q1 X4 (Postimmediate_Offset (word 16)) *) + 0x54ffff2c; (* arm_BGT (word 2097124) *) + 0x4e284806; (* arm_AESE Q6 Q0 *) + 0x4e2868c6; (* arm_AESMC Q6 Q6 *) + 0x4c407880; (* arm_LDR Q0 X4 No_Offset *) + 0x4e284826; (* arm_AESE Q6 Q1 *) + 0x6e201cc6; (* arm_EOR_VEC Q6 Q6 Q0 128 *) + 0x4c407000; (* arm_LDR Q0 X0 No_Offset *) + 0x6e201cc0; (* arm_EOR_VEC Q0 Q6 Q0 128 *) + 0xb940f066; (* arm_LDR W6 X3 (Immediate_Offset (word 240)) *) + 0x4cdfa87c; (* arm_LDP Q28 Q29 X3 (Postimmediate_Offset (word 32)) *) + 0x4e285b80; (* arm_AESD Q0 Q28 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4cdfa870; (* arm_LDP Q16 Q17 X3 (Postimmediate_Offset (word 32)) *) + 0x4e285ba0; (* arm_AESD Q0 Q29 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x710028c6; (* arm_SUBS W6 W6 (rvalue (word 10)) *) + 0x54000120; (* arm_BEQ (word 36) *) + 0x4e285a00; (* arm_AESD Q0 Q16 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4cdf7870; (* arm_LDR Q16 X3 (Postimmediate_Offset (word 16)) *) + 0x4e285a20; (* arm_AESD Q0 Q17 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4cdf7871; (* arm_LDR Q17 X3 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x54ffff2c; (* arm_BGT (word 2097124) *) + 0x4cdfa872; (* arm_LDP Q18 Q19 X3 (Postimmediate_Offset (word 32)) *) + 0x4e285a00; (* arm_AESD Q0 Q16 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a20; (* arm_AESD Q0 Q17 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4cdfa874; (* arm_LDP Q20 Q21 X3 (Postimmediate_Offset (word 32)) *) + 0x4e285a40; (* arm_AESD Q0 Q18 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a60; (* arm_AESD Q0 Q19 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4cdfa876; (* arm_LDP Q22 Q23 X3 (Postimmediate_Offset (word 32)) *) + 0x4e285a80; (* arm_AESD Q0 Q20 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285aa0; (* arm_AESD Q0 Q21 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4c407867; (* arm_LDR Q7 X3 No_Offset *) + 0x4e285ac0; (* arm_AESD Q0 Q22 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285ae0; (* arm_AESD Q0 Q23 *) + 0x6e271c00; (* arm_EOR_VEC Q0 Q0 Q7 128 *) + 0x6e201cc0; (* arm_EOR_VEC Q0 Q6 Q0 128 *) + 0x4c007020; (* arm_STR Q0 X1 No_Offset *) + 0x140001ff; (* arm_B (word 2044) *) + 0xa9bc53f3; (* arm_STP X19 X20 SP (Preimmediate_Offset (iword (-- &64))) *) + 0xa9035bf5; (* arm_STP X21 X22 SP (Immediate_Offset (iword (&48))) *) + 0x6d0227e8; (* arm_STP D8 D9 SP (Immediate_Offset (iword (&32))) *) + 0x6d012fea; (* arm_STP D10 D11 SP (Immediate_Offset (iword (&16))) *) + 0x92400c55; (* arm_AND X21 X2 (rvalue (word 15)) *) + 0x927cec42; (* arm_AND X2 X2 (rvalue (word 18446744073709551600)) *) + 0xf1004042; (* arm_SUBS X2 X2 (rvalue (word 16)) *) + 0xd2800208; (* arm_MOV X8 (rvalue (word 16)) *) + 0x54003e43; (* arm_BCC (word 1992) *) + 0xb940f086; (* arm_LDR W6 X4 (Immediate_Offset (word 240)) *) + 0x4cdf7080; (* arm_LDR Q0 X4 (Postimmediate_Offset (word 16)) *) + 0x4c4070a6; (* arm_LDR Q6 X5 No_Offset *) + 0x510008c6; (* arm_SUB W6 W6 (rvalue (word 2)) *) + 0x4cdf7081; (* arm_LDR Q1 X4 (Postimmediate_Offset (word 16)) *) + 0x4e284806; (* arm_AESE Q6 Q0 *) + 0x4e2868c6; (* arm_AESMC Q6 Q6 *) + 0x4cdf7880; (* arm_LDR Q0 X4 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e284826; (* arm_AESE Q6 Q1 *) + 0x4e2868c6; (* arm_AESMC Q6 Q6 *) + 0x4cdf7881; (* arm_LDR Q1 X4 (Postimmediate_Offset (word 16)) *) + 0x54ffff2c; (* arm_BGT (word 2097124) *) + 0x4e284806; (* arm_AESE Q6 Q0 *) + 0x4e2868c6; (* arm_AESMC Q6 Q6 *) + 0x4c407880; (* arm_LDR Q0 X4 No_Offset *) + 0x4e284826; (* arm_AESE Q6 Q1 *) + 0x6e201cc6; (* arm_EOR_VEC Q6 Q6 Q0 128 *) + 0x9e6600c9; (* arm_FMOV_FtoI X9 Q6 0 *) + 0x9eae00ca; (* arm_FMOV_FtoI X10 Q6 1 *) + 0x528010f3; (* arm_MOV W19 (rvalue (word 135)) *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670128; (* arm_FMOV_ItoF Q8 X9 0 *) + 0x9eaf0148; (* arm_FMOV_ItoF Q8 X10 1 *) + 0xb940f065; (* arm_LDR W5 X3 (Immediate_Offset (word 240)) *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670129; (* arm_FMOV_ItoF Q9 X9 0 *) + 0x9eaf0149; (* arm_FMOV_ItoF Q9 X10 1 *) + 0x4c40a870; (* arm_LDP Q16 Q17 X3 No_Offset *) + 0x510018a5; (* arm_SUB W5 W5 (rvalue (word 6)) *) + 0x8b051067; (* arm_ADD X7 X3 (Shiftedreg X5 LSL 4) *) + 0x510008a5; (* arm_SUB W5 W5 (rvalue (word 2)) *) + 0x4cdfa8f2; (* arm_LDP Q18 Q19 X7 (Postimmediate_Offset (word 32)) *) + 0x4cdfa8f4; (* arm_LDP Q20 Q21 X7 (Postimmediate_Offset (word 32)) *) + 0x4cdfa8f6; (* arm_LDP Q22 Q23 X7 (Postimmediate_Offset (word 32)) *) + 0x4c4078e7; (* arm_LDR Q7 X7 No_Offset *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e67012a; (* arm_FMOV_ItoF Q10 X9 0 *) + 0x9eaf014a; (* arm_FMOV_ItoF Q10 X10 1 *) + 0x91008067; (* arm_ADD X7 X3 (rvalue (word 32)) *) + 0x2a0503e6; (* arm_MOV W6 W5 *) + 0x14000002; (* arm_B (word 8) *) + 0xd503201f; (* arm_NOP *) + 0xf2400ebf; (* arm_TST X21 (rvalue (word 15)) *) + 0x540000c0; (* arm_BEQ (word 24) *) + 0xf1004042; (* arm_SUBS X2 X2 (rvalue (word 16)) *) + 0x9a8803e8; (* arm_CSEL X8 XZR X8 Condition_EQ *) + 0x4cdf7000; (* arm_LDR Q0 X0 (Postimmediate_Offset (word 16)) *) + 0x54003043; (* arm_BCC (word 1544) *) + 0xd1004000; (* arm_SUB X0 X0 (rvalue (word 16)) *) + 0x4cc87000; (* arm_LDR Q0 X0 (Postreg_Offset X8) *) + 0xf1008042; (* arm_SUBS X2 X2 (rvalue (word 32)) *) + 0x110008a6; (* arm_ADD W6 W5 (rvalue (word 2)) *) + 0x4ea01c03; (* arm_MOV_VEC Q3 Q0 128 *) + 0x4ea01c01; (* arm_MOV_VEC Q1 Q0 128 *) + 0x4ea01c1c; (* arm_MOV_VEC Q28 Q0 128 *) + 0x4cdf7018; (* arm_LDR Q24 X0 (Postimmediate_Offset (word 16)) *) + 0x4eb81f1b; (* arm_MOV_VEC Q27 Q24 128 *) + 0x4eb81f1d; (* arm_MOV_VEC Q29 Q24 128 *) + 0x540027e3; (* arm_BCC (word 1276) *) + 0x6e261c00; (* arm_EOR_VEC Q0 Q0 Q6 128 *) + 0x6e281f18; (* arm_EOR_VEC Q24 Q24 Q8 128 *) + 0x4eb81f01; (* arm_MOV_VEC Q1 Q24 128 *) + 0x4cdf7018; (* arm_LDR Q24 X0 (Postimmediate_Offset (word 16)) *) + 0x4ea01c02; (* arm_MOV_VEC Q2 Q0 128 *) + 0x4ea11c23; (* arm_MOV_VEC Q3 Q1 128 *) + 0x6e291f1b; (* arm_EOR_VEC Q27 Q24 Q9 128 *) + 0x6e291f18; (* arm_EOR_VEC Q24 Q24 Q9 128 *) + 0xf100805f; (* arm_CMP X2 (rvalue (word 32)) *) + 0x54001ac3; (* arm_BCC (word 856) *) + 0x4cdf7019; (* arm_LDR Q25 X0 (Postimmediate_Offset (word 16)) *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e67012b; (* arm_FMOV_ItoF Q11 X9 0 *) + 0x9eaf014b; (* arm_FMOV_ItoF Q11 X10 1 *) + 0x4cdf701a; (* arm_LDR Q26 X0 (Postimmediate_Offset (word 16)) *) + 0x6e2a1f39; (* arm_EOR_VEC Q25 Q25 Q10 128 *) + 0x6e2b1f5a; (* arm_EOR_VEC Q26 Q26 Q11 128 *) + 0xd1008042; (* arm_SUB X2 X2 (rvalue (word 32)) *) + 0x2a0503e6; (* arm_MOV W6 W5 *) + 0x14000001; (* arm_B (word 4) *) + 0x4e285a00; (* arm_AESD Q0 Q16 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a01; (* arm_AESD Q1 Q16 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a18; (* arm_AESD Q24 Q16 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285a19; (* arm_AESD Q25 Q16 *) + 0x4e287b39; (* arm_AESIMC Q25 Q25 *) + 0x4e285a1a; (* arm_AESD Q26 Q16 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x4cdf78f0; (* arm_LDR Q16 X7 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e285a20; (* arm_AESD Q0 Q17 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a21; (* arm_AESD Q1 Q17 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a38; (* arm_AESD Q24 Q17 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285a39; (* arm_AESD Q25 Q17 *) + 0x4e287b39; (* arm_AESIMC Q25 Q25 *) + 0x4e285a3a; (* arm_AESD Q26 Q17 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x4cdf78f1; (* arm_LDR Q17 X7 (Postimmediate_Offset (word 16)) *) + 0x54fffd2c; (* arm_BGT (word 2097060) *) + 0x4e285a00; (* arm_AESD Q0 Q16 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a01; (* arm_AESD Q1 Q16 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a18; (* arm_AESD Q24 Q16 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285a19; (* arm_AESD Q25 Q16 *) + 0x4e287b39; (* arm_AESIMC Q25 Q25 *) + 0x4e285a1a; (* arm_AESD Q26 Q16 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0xf1014042; (* arm_SUBS X2 X2 (rvalue (word 80)) *) + 0x4e285a20; (* arm_AESD Q0 Q17 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a21; (* arm_AESD Q1 Q17 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a38; (* arm_AESD Q24 Q17 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285a39; (* arm_AESD Q25 Q17 *) + 0x4e287b39; (* arm_AESIMC Q25 Q25 *) + 0x4e285a3a; (* arm_AESD Q26 Q17 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x9a82c3e6; (* arm_CSEL X6 XZR X2 Condition_GT *) + 0xaa0303e7; (* arm_MOV X7 X3 *) + 0x4e285a40; (* arm_AESD Q0 Q18 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a41; (* arm_AESD Q1 Q18 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a58; (* arm_AESD Q24 Q18 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285a59; (* arm_AESD Q25 Q18 *) + 0x4e287b39; (* arm_AESIMC Q25 Q25 *) + 0x4e285a5a; (* arm_AESD Q26 Q18 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x8b060000; (* arm_ADD X0 X0 X6 *) + 0x91018046; (* arm_ADD X6 X2 (rvalue (word 96)) *) + 0x4e285a60; (* arm_AESD Q0 Q19 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a61; (* arm_AESD Q1 Q19 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a78; (* arm_AESD Q24 Q19 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285a79; (* arm_AESD Q25 Q19 *) + 0x4e287b39; (* arm_AESIMC Q25 Q25 *) + 0x4e285a7a; (* arm_AESD Q26 Q19 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x4e285a80; (* arm_AESD Q0 Q20 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a81; (* arm_AESD Q1 Q20 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a98; (* arm_AESD Q24 Q20 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285a99; (* arm_AESD Q25 Q20 *) + 0x4e287b39; (* arm_AESIMC Q25 Q25 *) + 0x4e285a9a; (* arm_AESD Q26 Q20 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x4e285aa0; (* arm_AESD Q0 Q21 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285aa1; (* arm_AESD Q1 Q21 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285ab8; (* arm_AESD Q24 Q21 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285ab9; (* arm_AESD Q25 Q21 *) + 0x4e287b39; (* arm_AESIMC Q25 Q25 *) + 0x4e285aba; (* arm_AESD Q26 Q21 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x4e285ac0; (* arm_AESD Q0 Q22 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285ac1; (* arm_AESD Q1 Q22 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285ad8; (* arm_AESD Q24 Q22 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285ad9; (* arm_AESD Q25 Q22 *) + 0x4e287b39; (* arm_AESIMC Q25 Q25 *) + 0x4e285ada; (* arm_AESD Q26 Q22 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x6e261ce4; (* arm_EOR_VEC Q4 Q7 Q6 128 *) + 0x4e285ae0; (* arm_AESD Q0 Q23 *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670126; (* arm_FMOV_ItoF Q6 X9 0 *) + 0x9eaf0146; (* arm_FMOV_ItoF Q6 X10 1 *) + 0x6e281ce5; (* arm_EOR_VEC Q5 Q7 Q8 128 *) + 0x4cdf7002; (* arm_LDR Q2 X0 (Postimmediate_Offset (word 16)) *) + 0x4e285ae1; (* arm_AESD Q1 Q23 *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670128; (* arm_FMOV_ItoF Q8 X9 0 *) + 0x9eaf0148; (* arm_FMOV_ItoF Q8 X10 1 *) + 0x6e291cf1; (* arm_EOR_VEC Q17 Q7 Q9 128 *) + 0x4cdf7003; (* arm_LDR Q3 X0 (Postimmediate_Offset (word 16)) *) + 0x4e285af8; (* arm_AESD Q24 Q23 *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670129; (* arm_FMOV_ItoF Q9 X9 0 *) + 0x9eaf0149; (* arm_FMOV_ItoF Q9 X10 1 *) + 0x6e2a1cfe; (* arm_EOR_VEC Q30 Q7 Q10 128 *) + 0x4cdf701b; (* arm_LDR Q27 X0 (Postimmediate_Offset (word 16)) *) + 0x4e285af9; (* arm_AESD Q25 Q23 *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e67012a; (* arm_FMOV_ItoF Q10 X9 0 *) + 0x9eaf014a; (* arm_FMOV_ItoF Q10 X10 1 *) + 0x6e2b1cff; (* arm_EOR_VEC Q31 Q7 Q11 128 *) + 0x4cdf701c; (* arm_LDR Q28 X0 (Postimmediate_Offset (word 16)) *) + 0x4e285afa; (* arm_AESD Q26 Q23 *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e67012b; (* arm_FMOV_ItoF Q11 X9 0 *) + 0x9eaf014b; (* arm_FMOV_ItoF Q11 X10 1 *) + 0x4cdf701d; (* arm_LDR Q29 X0 (Postimmediate_Offset (word 16)) *) + 0xb4000586; (* arm_CBZ X6 (word 176) *) + 0x4cdf78f0; (* arm_LDR Q16 X7 (Postimmediate_Offset (word 16)) *) + 0x6e201c84; (* arm_EOR_VEC Q4 Q4 Q0 128 *) + 0x6e261c40; (* arm_EOR_VEC Q0 Q2 Q6 128 *) + 0x6e211ca5; (* arm_EOR_VEC Q5 Q5 Q1 128 *) + 0x6e281c61; (* arm_EOR_VEC Q1 Q3 Q8 128 *) + 0x6e381e31; (* arm_EOR_VEC Q17 Q17 Q24 128 *) + 0x6e291f78; (* arm_EOR_VEC Q24 Q27 Q9 128 *) + 0x6e391fde; (* arm_EOR_VEC Q30 Q30 Q25 128 *) + 0x6e2a1f99; (* arm_EOR_VEC Q25 Q28 Q10 128 *) + 0x6e3a1fff; (* arm_EOR_VEC Q31 Q31 Q26 128 *) + 0x4c9f7024; (* arm_STR Q4 X1 (Postimmediate_Offset (word 16)) *) + 0x6e2b1fba; (* arm_EOR_VEC Q26 Q29 Q11 128 *) + 0x4c9f7025; (* arm_STR Q5 X1 (Postimmediate_Offset (word 16)) *) + 0x2a0503e6; (* arm_MOV W6 W5 *) + 0x4c9f7031; (* arm_STR Q17 X1 (Postimmediate_Offset (word 16)) *) + 0x4cdf78f1; (* arm_LDR Q17 X7 (Postimmediate_Offset (word 16)) *) + 0x4c9f703e; (* arm_STR Q30 X1 (Postimmediate_Offset (word 16)) *) + 0x4c9f703f; (* arm_STR Q31 X1 (Postimmediate_Offset (word 16)) *) + 0x54ffeba2; (* arm_BCS (word 2096500) *) + 0xb100405f; (* arm_CMN X2 (rvalue (word 16)) *) + 0x540001a1; (* arm_BNE (word 52) *) + 0x4eaa1d4b; (* arm_MOV_VEC Q11 Q10 128 *) + 0x4ea91d2a; (* arm_MOV_VEC Q10 Q9 128 *) + 0x4ea81d09; (* arm_MOV_VEC Q9 Q8 128 *) + 0x4ea61cc8; (* arm_MOV_VEC Q8 Q6 128 *) + 0x9e660169; (* arm_FMOV_FtoI X9 Q11 0 *) + 0x9eae016a; (* arm_FMOV_FtoI X10 Q11 1 *) + 0x6e221cc0; (* arm_EOR_VEC Q0 Q6 Q2 128 *) + 0x6e231d01; (* arm_EOR_VEC Q1 Q8 Q3 128 *) + 0x6e291f78; (* arm_EOR_VEC Q24 Q27 Q9 128 *) + 0x6e2a1f99; (* arm_EOR_VEC Q25 Q28 Q10 128 *) + 0x6e2b1fba; (* arm_EOR_VEC Q26 Q29 Q11 128 *) + 0x54ffe9e0; (* arm_BEQ (word 2096444) *) + 0x91014042; (* arm_ADD X2 X2 (rvalue (word 80)) *) + 0xb4001582; (* arm_CBZ X2 (word 688) *) + 0x110008a6; (* arm_ADD W6 W5 (rvalue (word 2)) *) + 0xf100c042; (* arm_SUBS X2 X2 (rvalue (word 48)) *) + 0x54000e23; (* arm_BCC (word 452) *) + 0x6e3b1cc0; (* arm_EOR_VEC Q0 Q6 Q27 128 *) + 0x6e3c1d01; (* arm_EOR_VEC Q1 Q8 Q28 128 *) + 0x6e291fb8; (* arm_EOR_VEC Q24 Q29 Q9 128 *) + 0x1400000e; (* arm_B (word 56) *) + 0xd503201f; (* arm_NOP *) + 0x91004000; (* arm_ADD X0 X0 (rvalue (word 16)) *) + 0xf2400ebf; (* arm_TST X21 (rvalue (word 15)) *) + 0x6e241c25; (* arm_EOR_VEC Q5 Q1 Q4 128 *) + 0x4c9f7025; (* arm_STR Q5 X1 (Postimmediate_Offset (word 16)) *) + 0x6e311f11; (* arm_EOR_VEC Q17 Q24 Q17 128 *) + 0x4c9f7031; (* arm_STR Q17 X1 (Postimmediate_Offset (word 16)) *) + 0x6e3e1f3e; (* arm_EOR_VEC Q30 Q25 Q30 128 *) + 0x6e3f1f5f; (* arm_EOR_VEC Q31 Q26 Q31 128 *) + 0x4c9fa03e; (* arm_STP Q30 Q31 X1 (Postimmediate_Offset (word 32)) *) + 0x54001a00; (* arm_BEQ (word 832) *) + 0x4cdf7800; (* arm_LDR Q0 X0 (Postimmediate_Offset (word 16)) *) + 0x14000098; (* arm_B (word 608) *) + 0x4e285a00; (* arm_AESD Q0 Q16 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a01; (* arm_AESD Q1 Q16 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a18; (* arm_AESD Q24 Q16 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4cdf78f0; (* arm_LDR Q16 X7 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e285a20; (* arm_AESD Q0 Q17 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a21; (* arm_AESD Q1 Q17 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a38; (* arm_AESD Q24 Q17 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4cdf78f1; (* arm_LDR Q17 X7 (Postimmediate_Offset (word 16)) *) + 0x54fffe2c; (* arm_BGT (word 2097092) *) + 0x4e285a00; (* arm_AESD Q0 Q16 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a01; (* arm_AESD Q1 Q16 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a18; (* arm_AESD Q24 Q16 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x6e271cc4; (* arm_EOR_VEC Q4 Q6 Q7 128 *) + 0xf100c042; (* arm_SUBS X2 X2 (rvalue (word 48)) *) + 0x9e660129; (* arm_FMOV_FtoI X9 Q9 0 *) + 0x9eae012a; (* arm_FMOV_FtoI X10 Q9 1 *) + 0x528010f3; (* arm_MOV W19 (rvalue (word 135)) *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670126; (* arm_FMOV_ItoF Q6 X9 0 *) + 0x9eaf0146; (* arm_FMOV_ItoF Q6 X10 1 *) + 0x6e271d05; (* arm_EOR_VEC Q5 Q8 Q7 128 *) + 0x9a863046; (* arm_CSEL X6 X2 X6 Condition_CC *) + 0x4e285a20; (* arm_AESD Q0 Q17 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a21; (* arm_AESD Q1 Q17 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a38; (* arm_AESD Q24 Q17 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x6e271d31; (* arm_EOR_VEC Q17 Q9 Q7 128 *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670128; (* arm_FMOV_ItoF Q8 X9 0 *) + 0x9eaf0148; (* arm_FMOV_ItoF Q8 X10 1 *) + 0x910080c6; (* arm_ADD X6 X6 (rvalue (word 32)) *) + 0x8b060000; (* arm_ADD X0 X0 X6 *) + 0xaa0303e7; (* arm_MOV X7 X3 *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670129; (* arm_FMOV_ItoF Q9 X9 0 *) + 0x9eaf0149; (* arm_FMOV_ItoF Q9 X10 1 *) + 0x4e285a80; (* arm_AESD Q0 Q20 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a81; (* arm_AESD Q1 Q20 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a98; (* arm_AESD Q24 Q20 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285aa0; (* arm_AESD Q0 Q21 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285aa1; (* arm_AESD Q1 Q21 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285ab8; (* arm_AESD Q24 Q21 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285ac0; (* arm_AESD Q0 Q22 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285ac1; (* arm_AESD Q1 Q22 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285ad8; (* arm_AESD Q24 Q22 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4cdf701b; (* arm_LDR Q27 X0 (Postimmediate_Offset (word 16)) *) + 0x4e285ae0; (* arm_AESD Q0 Q23 *) + 0x4e285ae1; (* arm_AESD Q1 Q23 *) + 0x4e285af8; (* arm_AESD Q24 Q23 *) + 0x4cdf78f0; (* arm_LDR Q16 X7 (Postimmediate_Offset (word 16)) *) + 0x110008a6; (* arm_ADD W6 W5 (rvalue (word 2)) *) + 0x6e201c84; (* arm_EOR_VEC Q4 Q4 Q0 128 *) + 0x6e211ca5; (* arm_EOR_VEC Q5 Q5 Q1 128 *) + 0x6e311f18; (* arm_EOR_VEC Q24 Q24 Q17 128 *) + 0x4cdf78f1; (* arm_LDR Q17 X7 (Postimmediate_Offset (word 16)) *) + 0x4c9f7024; (* arm_STR Q4 X1 (Postimmediate_Offset (word 16)) *) + 0x4c9f7025; (* arm_STR Q5 X1 (Postimmediate_Offset (word 16)) *) + 0x4c9f7038; (* arm_STR Q24 X1 (Postimmediate_Offset (word 16)) *) + 0xb100c05f; (* arm_CMN X2 (rvalue (word 48)) *) + 0x9100c042; (* arm_ADD X2 X2 (rvalue (word 48)) *) + 0x540007a0; (* arm_BEQ (word 244) *) + 0xd100c042; (* arm_SUB X2 X2 (rvalue (word 48)) *) + 0x4ea31c7c; (* arm_MOV_VEC Q28 Q3 128 *) + 0x4ebb1f7d; (* arm_MOV_VEC Q29 Q27 128 *) + 0xd503201f; (* arm_NOP *) + 0xb100405f; (* arm_CMN X2 (rvalue (word 16)) *) + 0x6e261f81; (* arm_EOR_VEC Q1 Q28 Q6 128 *) + 0x6e281fb8; (* arm_EOR_VEC Q24 Q29 Q8 128 *) + 0x54000040; (* arm_BEQ (word 8) *) + 0x6e261fb8; (* arm_EOR_VEC Q24 Q29 Q6 128 *) + 0x4e285a01; (* arm_AESD Q1 Q16 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a18; (* arm_AESD Q24 Q16 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4cdf78f0; (* arm_LDR Q16 X7 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e285a21; (* arm_AESD Q1 Q17 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a38; (* arm_AESD Q24 Q17 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4cdf78f1; (* arm_LDR Q17 X7 (Postimmediate_Offset (word 16)) *) + 0x54fffeac; (* arm_BGT (word 2097108) *) + 0x4e285a01; (* arm_AESD Q1 Q16 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a18; (* arm_AESD Q24 Q16 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285a21; (* arm_AESD Q1 Q17 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a38; (* arm_AESD Q24 Q17 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285a81; (* arm_AESD Q1 Q20 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a98; (* arm_AESD Q24 Q20 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0xb100805f; (* arm_CMN X2 (rvalue (word 32)) *) + 0x4e285aa1; (* arm_AESD Q1 Q21 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285ab8; (* arm_AESD Q24 Q21 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x6e271cc5; (* arm_EOR_VEC Q5 Q6 Q7 128 *) + 0x4e285ac1; (* arm_AESD Q1 Q22 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285ad8; (* arm_AESD Q24 Q22 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x6e271d11; (* arm_EOR_VEC Q17 Q8 Q7 128 *) + 0x4e285ae1; (* arm_AESD Q1 Q23 *) + 0x4e285af8; (* arm_AESD Q24 Q23 *) + 0x54000120; (* arm_BEQ (word 36) *) + 0x6e211ca5; (* arm_EOR_VEC Q5 Q5 Q1 128 *) + 0x6e381e31; (* arm_EOR_VEC Q17 Q17 Q24 128 *) + 0x4ea91d26; (* arm_MOV_VEC Q6 Q9 128 *) + 0x4eaa1d48; (* arm_MOV_VEC Q8 Q10 128 *) + 0x4c9f7025; (* arm_STR Q5 X1 (Postimmediate_Offset (word 16)) *) + 0x4c9f7031; (* arm_STR Q17 X1 (Postimmediate_Offset (word 16)) *) + 0x91004042; (* arm_ADD X2 X2 (rvalue (word 16)) *) + 0x14000006; (* arm_B (word 24) *) + 0x6e381ca5; (* arm_EOR_VEC Q5 Q5 Q24 128 *) + 0x4ea81d06; (* arm_MOV_VEC Q6 Q8 128 *) + 0x4ea91d28; (* arm_MOV_VEC Q8 Q9 128 *) + 0x4c9f7025; (* arm_STR Q5 X1 (Postimmediate_Offset (word 16)) *) + 0x91008042; (* arm_ADD X2 X2 (rvalue (word 32)) *) + 0xf2400ebf; (* arm_TST X21 (rvalue (word 15)) *) + 0x540006a0; (* arm_BEQ (word 212) *) + 0xaa0303e7; (* arm_MOV X7 X3 *) + 0xb5000042; (* arm_CBNZ X2 (word 8) *) + 0x4cdf7800; (* arm_LDR Q0 X0 (Postimmediate_Offset (word 16)) *) + 0x6e281c1a; (* arm_EOR_VEC Q26 Q0 Q8 128 *) + 0xb940f066; (* arm_LDR W6 X3 (Immediate_Offset (word 240)) *) + 0x4cdf7860; (* arm_LDR Q0 X3 (Postimmediate_Offset (word 16)) *) + 0x510008c6; (* arm_SUB W6 W6 (rvalue (word 2)) *) + 0x4cdf7861; (* arm_LDR Q1 X3 (Postimmediate_Offset (word 16)) *) + 0x4e28581a; (* arm_AESD Q26 Q0 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x4cdf7860; (* arm_LDR Q0 X3 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e28583a; (* arm_AESD Q26 Q1 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x4cdf7861; (* arm_LDR Q1 X3 (Postimmediate_Offset (word 16)) *) + 0x54ffff2c; (* arm_BGT (word 2097124) *) + 0x4e28581a; (* arm_AESD Q26 Q0 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x4c407860; (* arm_LDR Q0 X3 No_Offset *) + 0x4e28583a; (* arm_AESD Q26 Q1 *) + 0x6e201f5a; (* arm_EOR_VEC Q26 Q26 Q0 128 *) + 0x6e281f5a; (* arm_EOR_VEC Q26 Q26 Q8 128 *) + 0x4c00703a; (* arm_STR Q26 X1 No_Offset *) + 0xaa0003f4; (* arm_MOV X20 X0 *) + 0x9100402d; (* arm_ADD X13 X1 (rvalue (word 16)) *) + 0xf10006b5; (* arm_SUBS X21 X21 (rvalue (word 1)) *) + 0x3875682f; (* arm_LDRB W15 X1 (Register_Offset X21) *) + 0x38756a8e; (* arm_LDRB W14 X20 (Register_Offset X21) *) + 0x383569af; (* arm_STRB W15 X13 (Register_Offset X21) *) + 0x3835682e; (* arm_STRB W14 X1 (Register_Offset X21) *) + 0x54ffff6c; (* arm_BGT (word 2097132) *) + 0x4c40703a; (* arm_LDR Q26 X1 No_Offset *) + 0x6e261f5a; (* arm_EOR_VEC Q26 Q26 Q6 128 *) + 0xb940f0e6; (* arm_LDR W6 X7 (Immediate_Offset (word 240)) *) + 0x4cdf70e0; (* arm_LDR Q0 X7 (Postimmediate_Offset (word 16)) *) + 0x510008c6; (* arm_SUB W6 W6 (rvalue (word 2)) *) + 0x4cdf70e1; (* arm_LDR Q1 X7 (Postimmediate_Offset (word 16)) *) + 0x4e28581a; (* arm_AESD Q26 Q0 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x4cdf78e0; (* arm_LDR Q0 X7 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e28583a; (* arm_AESD Q26 Q1 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x4cdf78e1; (* arm_LDR Q1 X7 (Postimmediate_Offset (word 16)) *) + 0x54ffff2c; (* arm_BGT (word 2097124) *) + 0x4e28581a; (* arm_AESD Q26 Q0 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x4c4078e0; (* arm_LDR Q0 X7 No_Offset *) + 0x4e28583a; (* arm_AESD Q26 Q1 *) + 0x6e201f5a; (* arm_EOR_VEC Q26 Q26 Q0 128 *) + 0x6e261f5a; (* arm_EOR_VEC Q26 Q26 Q6 128 *) + 0x4c00703a; (* arm_STR Q26 X1 No_Offset *) + 0xa9435bf5; (* arm_LDP X21 X22 SP (Immediate_Offset (iword (&48))) *) + 0x6d4227e8; (* arm_LDP D8 D9 SP (Immediate_Offset (iword (&32))) *) + 0x6d412fea; (* arm_LDP D10 D11 SP (Immediate_Offset (iword (&16))) *) + 0xa8c453f3; (* arm_LDP X19 X20 SP (Postimmediate_Offset (iword (&64))) *) + 0xd65f03c0 (* arm_RET X30 *) +];; diff --git a/arm/proofs/aes-xts-armv8.ml b/arm/proofs/aes-xts-armv8.ml new file mode 100644 index 000000000..cbba0da7d --- /dev/null +++ b/arm/proofs/aes-xts-armv8.ml @@ -0,0 +1,22 @@ +(* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +use_file_raise_failure := true;; + +needs "arm/proofs/base.ml";; + +(* print_literal_from_elf "arm/aes-xts/aes-xts-armv8.o";; *) +save_literal_from_elf "arm/aes-xts/aes-xts-armv8.txt" "arm/aes-xts/aes-xts-armv8.o";; + +(* let aes_xts_armv8 = define_assert_from_elf "aes_xts_armv8" "arm/aes-xts/aes-xts-armv8.o" ..*) + +(* Missing instructions that were added in PR#211 +4c4070a6 10: 4c4070a6 ld1.16b { v6 }, [x5] +4cdfa87c 5c: 4cdfa87c ld1.4s { v28, v29 }, [x3], #32 +d503201f f8: d503201f nop +4cc87000 198: 4cc87000 ld1.16b { v0 }, [x0], x8 +4c40a870 19c: 4c40a870 ld1.4s { v16, v17 }, [x3] +3875682f 818: 3875682f ldrb w15, [x1, x21] +*) \ No newline at end of file