Skip to content

Commit 6b598bb

Browse files
committed
Loongarch: add LSX SIMD immediate load modelling
1 parent bbf7e25 commit 6b598bb

File tree

2 files changed

+146
-26
lines changed

2 files changed

+146
-26
lines changed

Ghidra/Processors/Loongarch/data/languages/loongarch_main.sinc

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,11 @@ vrD.H.imm18_3: vrD, val is vrD & imm18_3 [ val = 0x1000 + 32*vrD + 2*imm18_3; ]
474474
vrD.W.imm18_2: vrD, val is vrD & imm18_2 [ val = 0x1000 + 32*vrD + 4*imm18_2; ] { export *[register]:4 val; }
475475
vrD.D.imm18_1: vrD, val is vrD & imm18_1 [ val = 0x1000 + 32*vrD + 8*imm18_1; ] { export *[register]:8 val; }
476476

477+
vrJ.B.imm18_4: vrJ, val is vrJ & imm18_4 [ val = 0x1000 + 32*vrJ + imm18_4; ] { export *[register]:1 val; }
478+
vrJ.H.imm18_3: vrJ, val is vrJ & imm18_3 [ val = 0x1000 + 32*vrJ + 2*imm18_3; ] { export *[register]:2 val; }
479+
vrJ.W.imm18_2: vrJ, val is vrJ & imm18_2 [ val = 0x1000 + 32*vrJ + 4*imm18_2; ] { export *[register]:4 val; }
480+
vrJ.D.imm18_1: vrJ, val is vrJ & imm18_1 [ val = 0x1000 + 32*vrJ + 8*imm18_1; ] { export *[register]:8 val; }
481+
477482
# general pcodeops
478483
define pcodeop break;
479484
define pcodeop cpucfg;
@@ -581,3 +586,11 @@ macro tzcount64(input, count) {
581586
count = count - 1 * zext((v & 0x5555555555555555) != 0);
582587
}
583588

589+
macro select_value(cond, a, b, value) {
590+
if (cond == 0) goto <l1>;
591+
goto <l2>;
592+
<l1>
593+
value = b;
594+
<l2>
595+
value = a;
596+
}

Ghidra/Processors/Loongarch/data/languages/lsx.sinc

Lines changed: 133 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4565,7 +4565,14 @@ define pcodeop vexth.h.b;
45654565
#lsx.txt vexth.h.b mask=0x729ee000
45664566
#0x729ee000 0xfffffc00 v0:5,v5:5 ['vreg0_5_s0', 'vreg5_5_s0']
45674567
:vexth.h.b vrD, vrJ is op10_31=0x1ca7b8 & vrD & vrJ {
4568-
vrD = vexth.h.b(vrD, vrJ);
4568+
vrD[0,16] = sext(vrJ[64,8]);
4569+
vrD[16,16] = sext(vrJ[72,8]);
4570+
vrD[32,16] = sext(vrJ[80,8]);
4571+
vrD[48,16] = sext(vrJ[88,8]);
4572+
vrD[64,16] = sext(vrJ[96,8]);
4573+
vrD[80,16] = sext(vrJ[104,8]);
4574+
vrD[96,16] = sext(vrJ[112,8]);
4575+
vrD[112,16] = sext(vrJ[120,8]);
45694576
}
45704577

45714578
define pcodeop vexth.w.h;
@@ -4784,36 +4791,33 @@ define pcodeop vinsgr2vr.d;
47844791
vrD = vinsgr2vr.d(vrD, RJsrc, imm10_1:$(REGSIZE));
47854792
}
47864793

4787-
define pcodeop vpickve2gr.b;
4788-
4789-
#lsx.txt vpickve2gr.b mask=0x72ef8000
4794+
#lsx.txt vpickve2gr.b mask=0x72ef8000
47904795
#0x72ef8000 0xffffc000 r0:5,v5:5,u10:4 ['reg0_5_s0', 'vreg5_5_s0', 'imm10_4_s0']
4791-
:vpickve2gr.b RD, vrJ, imm10_4 is op14_31=0x1cbbe & RD & vrJ & imm10_4 {
4792-
RD = vpickve2gr.b(RD, vrJ, imm10_4:$(REGSIZE));
4796+
:vpickve2gr.b RD, vrJ.B.imm18_4 is op14_31=0x1cbbe & RD & vrJ.B.imm18_4 & vrJ & imm10_4 {
4797+
RD = sext(vrJ.B.imm18_4);
47934798
}
47944799

4795-
define pcodeop vpickve2gr.h;
4796-
4797-
#lsx.txt vpickve2gr.h mask=0x72efc000
4800+
#lsx.txt vpickve2gr.h mask=0x72efc000
47984801
#0x72efc000 0xffffe000 r0:5,v5:5,u10:3 ['reg0_5_s0', 'vreg5_5_s0', 'imm10_3_s0']
4799-
:vpickve2gr.h RD, vrJ, imm10_3 is op13_31=0x3977e & RD & vrJ & imm10_3 {
4800-
RD = vpickve2gr.h(RD, vrJ, imm10_3:$(REGSIZE));
4802+
:vpickve2gr.h RD, vrJ.H.imm18_3 is op13_31=0x3977e & RD & vrJ.H.imm18_3 & vrJ & imm10_3 {
4803+
RD = sext(vrJ.H.imm18_3);
48014804
}
48024805

4803-
define pcodeop vpickve2gr.w;
4804-
4805-
#lsx.txt vpickve2gr.w mask=0x72efe000
4806+
#lsx.txt vpickve2gr.w mask=0x72efe000
48064807
#0x72efe000 0xfffff000 r0:5,v5:5,u10:2 ['reg0_5_s0', 'vreg5_5_s0', 'imm10_2_s0']
4807-
:vpickve2gr.w RD, vrJ, imm10_2 is op12_31=0x72efe & RD & vrJ & imm10_2 {
4808-
RD = vpickve2gr.w(RD, vrJ, imm10_2:$(REGSIZE));
4808+
:vpickve2gr.w RD, vrJ.W.imm18_2 is op12_31=0x72efe & RD & vrJ.W.imm18_2 & vrJ & imm10_2 {
4809+
RD = sext(vrJ.W.imm18_2);
48094810
}
48104811

48114812
define pcodeop vpickve2gr.d;
4812-
4813-
#lsx.txt vpickve2gr.d mask=0x72eff000
4813+
#lsx.txt vpickve2gr.d mask=0x72eff000
48144814
#0x72eff000 0xfffff800 r0:5,v5:5,u10:1 ['reg0_5_s0', 'vreg5_5_s0', 'imm10_1_s0']
4815-
:vpickve2gr.d RD, vrJ, imm10_1 is op11_31=0xe5dfe & RD & vrJ & imm10_1 {
4815+
:vpickve2gr.d RD, vrJ.D.imm18_1 is op11_31=0xe5dfe & RD & vrJ.D.imm18_1 & vrJ & imm10_1 {
4816+
@ifdef LA64
4817+
RD = sext(vrJ.D.imm18_1);
4818+
@else
48164819
RD = vpickve2gr.d(RD, vrJ, imm10_1:$(REGSIZE));
4820+
@endif
48174821
}
48184822

48194823
define pcodeop vpickve2gr.bu;
@@ -5688,19 +5692,122 @@ define pcodeop vnori.b;
56885692
vrD = vnori.b(vrD, vrJ, imm10_8:$(REGSIZE));
56895693
}
56905694

5691-
define pcodeop vldi;
5692-
56935695
#lsx.txt vldi mask=0x73e00000
56945696
#0x73e00000 0xfffc0000 v0:5, s5:13 ['vreg0_5_s0', 'simm5_13_s0']
56955697
:vldi vrD, simm5_13 is op18_31=0x1cf8 & vrD & simm5_13 {
5696-
vrD = vldi(vrD, simm5_13:$(REGSIZE));
5698+
local tmp2:8 = simm5_13:8;
5699+
local tmp3:4 = 0;
5700+
local tmp4:8 = 0;
5701+
local tmp5:8 = 0;
5702+
local tmp6:8 = 0;
5703+
local tmp7:8 = 0;
5704+
local tmp8:8 = 0;
5705+
vrD[0,64] = 0x0;
5706+
tmp4 = zext(tmp2);
5707+
vrD[64,64] = 0x0;
5708+
tmp5 = ((tmp4 << 0x36) s>> 0x36);
5709+
local tmp9:8 = (tmp2 >> 10) & 7;
5710+
if (tmp9 == 0) goto <block_8>;
5711+
if (tmp9 == 1) goto <block_10>;
5712+
if (tmp9 == 2) goto <block_12>;
5713+
if (tmp9 == 3) goto <block_14>;
5714+
goto <block_0>;
5715+
<block_0>
5716+
tmp6 = (tmp2 << 24);
5717+
tmp7 = zext(tmp2 & 63);
5718+
tmp5 = zext(tmp2 & 255);
5719+
tmp8 = zext((tmp2 >> 6) & 1);
5720+
local tmp10:8 = ((tmp2 >> 8) & 31) - 17;
5721+
if (tmp10 == 0) goto <block_11>;
5722+
if (tmp10 == 1) goto <block_1>;
5723+
if (tmp10 == 2) goto <block_2>;
5724+
if (tmp10 == 3) goto <block_10>;
5725+
if (tmp10 == 4) goto <block_9>;
5726+
if (tmp10 == 5) goto <block_3>;
5727+
if (tmp10 == 6) goto <block_4>;
5728+
if (tmp10 == 7) goto <block_8>;
5729+
if (tmp10 == 8) goto <block_13>;
5730+
if (tmp10 == 9) goto <block_5>;
5731+
if (tmp10 == 10) goto <block_6>;
5732+
if (tmp10 == 11) goto <block_7>;
5733+
goto <block_12>;
5734+
<block_1>
5735+
tmp5 = tmp5 << 0x10;
5736+
goto <block_12>;
5737+
<block_2>
5738+
tmp5 = tmp5 << 0x18;
5739+
goto <block_12>;
5740+
<block_3>
5741+
tmp5 = (tmp5 << 0x8) | 0xff;
5742+
goto <block_12>;
5743+
<block_4>
5744+
tmp5 = (tmp5 << 0x10) | 0xffff;
5745+
goto <block_12>;
5746+
<block_5>
5747+
tmp5 = ((tmp8 * 0x3e000000) | zext(((tmp6 & 0xc0000000) ^ 0x40000000))) | (tmp7 << 0x13);
5748+
goto <block_12>;
5749+
<block_6>
5750+
tmp5 = ((tmp8 * 0x3e000000) | zext(((tmp6 & 0xc0000000) ^ 0x40000000))) | (tmp7 << 0x13);
5751+
goto <block_14>;
5752+
<block_7>
5753+
tmp5 = (((tmp8 * 0x3fc0000000000000) | ((tmp4 << 0x38) & 0xc000000000000000)) | (tmp7 << 0x30)) ^ 0x4000000000000000;
5754+
goto <block_14>;
5755+
<block_8>
5756+
tmp5 = (tmp4 & 0xff) * 0x101010101010101;
5757+
vrD[64,64] = tmp5;
5758+
vrD[0,64] = tmp5;
5759+
goto <end>;
5760+
<block_9>
5761+
tmp5 = (tmp5 << 0x8);
5762+
<block_10>
5763+
tmp2 = tmp5;
5764+
vrD[112,16] = tmp2:2;
5765+
vrD[96,16] = tmp2:2;
5766+
vrD[80,16] = tmp2:2;
5767+
vrD[64,16] = tmp2:2;
5768+
vrD[48,16] = tmp2:2;
5769+
vrD[32,16] = tmp2:2;
5770+
vrD[16,16] = tmp2:2;
5771+
vrD[0,16] = tmp2:2;
5772+
goto <end>;
5773+
<block_11>
5774+
tmp5 = (tmp5 << 0x8);
5775+
<block_12>
5776+
tmp2 = tmp5;
5777+
vrD[96,32] = tmp2:4;
5778+
vrD[64,32] = tmp2:4;
5779+
vrD[32,32] = tmp2:4;
5780+
vrD[0,32] = tmp2:4;
5781+
goto <end>;
5782+
<block_13>
5783+
local b0:8 = 0;
5784+
local b1:8 = 0;
5785+
local b2:8 = 0;
5786+
select_value((tmp2 & 1), 0xff, 0x0, b0);
5787+
select_value((tmp2 & 2), 0xff00, 0x0, b1);
5788+
select_value((tmp2 & 4), 0xff0000, 0x0, b2);
5789+
tmp5 = ((((b1 | b0 | b2 | zext(((tmp2 << 28) s>> 7) & 0xff000000)) | (zext((tmp2 >> 4) & 1) * 0xff00000000)) | (zext(((tmp2 >> 5) & 1)) * 0xff0000000000)) | (tmp8 * 0xff000000000000)) + (zext(tmp6 s>> 31) << 0x38);
5790+
<block_14>
5791+
vrD[64,64] = tmp5;
5792+
vrD[0,64] = tmp5;
5793+
<end>
56975794
}
56985795

5699-
define pcodeop vpermi.w;
5700-
57015796
#lsx.txt vpermi.w mask=0x73e40000
57025797
#0x73e40000 0xfffc0000 v0:5,v5:5,u10:8 ['vreg0_5_s0', 'vreg5_5_s0', 'imm10_8_s0']
5703-
:vpermi.w vrD, vrJ, imm10_8 is op18_31=0x1cf9 & vrD & vrJ & imm10_8 {
5704-
vrD = vpermi.w(vrD, vrJ, imm10_8:$(REGSIZE));
5798+
:vpermi.w vrD, vrJ, imm10_8, vrDN, vrJN
5799+
is op18_31=0x1cf9 & vrD & vrJ & imm10_8 [ vrDN = 0x1000 + 32*vrD; vrJN = 0x1000 + 32*vrJ; ] {
5800+
local tmp1_idx:4 = imm10_8 & 3;
5801+
local tmp2_idx:4 = (imm10_8 >> 2) & 3;
5802+
local tmp3_idx:4 = (imm10_8 >> 4) & 3;
5803+
local tmp4_idx:4 = (imm10_8 >> 6) & 3;
5804+
local tmp1:4 = *[register]:4 (vrJN + 4*tmp1_idx);
5805+
local tmp2:4 = *[register]:4 (vrJN + 4*tmp2_idx);
5806+
local tmp3:4 = *[register]:4 (vrDN + 4*tmp3_idx);
5807+
local tmp4:4 = *[register]:4 (vrDN + 4*tmp4_idx);
5808+
vrD[0,32] = tmp1;
5809+
vrD[32,32] = tmp2;
5810+
vrD[64,32] = tmp3;
5811+
vrD[96,32] = tmp4;
57055812
}
57065813

0 commit comments

Comments
 (0)