Skip to content

Commit 7c2399a

Browse files
committed
x64: convert SSE multiplications
1 parent 0747529 commit 7c2399a

File tree

3 files changed

+28
-39
lines changed

3 files changed

+28
-39
lines changed

cranelift/codegen/src/isa/x64/inst.isle

Lines changed: 11 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3587,59 +3587,52 @@
35873587

35883588
;; Helper for creating `pmullw` instructions.
35893589
(decl x64_pmullw (Xmm XmmMem) Xmm)
3590-
(rule 0 (x64_pmullw src1 src2)
3591-
(xmm_rm_r (SseOpcode.Pmullw) src1 src2))
35923590
(rule 1 (x64_pmullw src1 src2)
35933591
(if-let true (use_avx))
35943592
(xmm_rmir_vex (AvxOpcode.Vpmullw) src1 src2))
3593+
(rule 0 (x64_pmullw src1 src2) (x64_pmullw_a src1 src2))
35953594

35963595
;; Helper for creating `pmulld` instructions.
35973596
(decl x64_pmulld (Xmm XmmMem) Xmm)
3598-
(rule 0 (x64_pmulld src1 src2)
3599-
(xmm_rm_r (SseOpcode.Pmulld) src1 src2))
36003597
(rule 1 (x64_pmulld src1 src2)
36013598
(if-let true (use_avx))
36023599
(xmm_rmir_vex (AvxOpcode.Vpmulld) src1 src2))
3600+
(rule 0 (x64_pmulld src1 src2) (x64_pmulld_a src1 src2))
36033601

36043602
;; Helper for creating `pmulhw` instructions.
36053603
(decl x64_pmulhw (Xmm XmmMem) Xmm)
3606-
(rule 0 (x64_pmulhw src1 src2)
3607-
(xmm_rm_r (SseOpcode.Pmulhw) src1 src2))
36083604
(rule 1 (x64_pmulhw src1 src2)
36093605
(if-let true (use_avx))
36103606
(xmm_rmir_vex (AvxOpcode.Vpmulhw) src1 src2))
3607+
(rule 0 (x64_pmulhw src1 src2) (x64_pmulhw_a src1 src2))
36113608

36123609
;; Helper for creating `pmulhrsw` instructions.
36133610
(decl x64_pmulhrsw (Xmm XmmMem) Xmm)
3614-
(rule 0 (x64_pmulhrsw src1 src2)
3615-
(xmm_rm_r (SseOpcode.Pmulhrsw) src1 src2))
36163611
(rule 1 (x64_pmulhrsw src1 src2)
36173612
(if-let true (use_avx))
36183613
(xmm_rmir_vex (AvxOpcode.Vpmulhrsw) src1 src2))
3614+
(rule 0 (x64_pmulhrsw src1 src2) (x64_pmulhrsw_a src1 src2))
36193615

36203616
;; Helper for creating `pmulhuw` instructions.
36213617
(decl x64_pmulhuw (Xmm XmmMem) Xmm)
3622-
(rule 0 (x64_pmulhuw src1 src2)
3623-
(xmm_rm_r (SseOpcode.Pmulhuw) src1 src2))
36243618
(rule 1 (x64_pmulhuw src1 src2)
36253619
(if-let true (use_avx))
36263620
(xmm_rmir_vex (AvxOpcode.Vpmulhuw) src1 src2))
3621+
(rule 0 (x64_pmulhuw src1 src2) (x64_pmulhuw_a src1 src2))
36273622

36283623
;; Helper for creating `pmuldq` instructions.
36293624
(decl x64_pmuldq (Xmm XmmMem) Xmm)
3630-
(rule 0 (x64_pmuldq src1 src2)
3631-
(xmm_rm_r (SseOpcode.Pmuldq) src1 src2))
36323625
(rule 1 (x64_pmuldq src1 src2)
36333626
(if-let true (use_avx))
36343627
(xmm_rmir_vex (AvxOpcode.Vpmuldq) src1 src2))
3628+
(rule 0 (x64_pmuldq src1 src2) (x64_pmuldq_a src1 src2))
36353629

36363630
;; Helper for creating `pmuludq` instructions.
36373631
(decl x64_pmuludq (Xmm XmmMem) Xmm)
3638-
(rule 0 (x64_pmuludq src1 src2)
3639-
(xmm_rm_r (SseOpcode.Pmuludq) src1 src2))
36403632
(rule 1 (x64_pmuludq src1 src2)
36413633
(if-let true (use_avx))
36423634
(xmm_rmir_vex (AvxOpcode.Vpmuludq) src1 src2))
3635+
(rule 0 (x64_pmuludq src1 src2) (x64_pmuludq_a src1 src2))
36433636

36443637
;; Helper for creating `punpckhwd` instructions.
36453638
(decl x64_punpckhwd (Xmm XmmMem) Xmm)
@@ -3792,35 +3785,31 @@
37923785

37933786
;; Helper for creating `mulss` instructions.
37943787
(decl x64_mulss (Xmm XmmMem) Xmm)
3795-
(rule (x64_mulss src1 src2)
3796-
(xmm_rm_r_unaligned (SseOpcode.Mulss) src1 src2))
37973788
(rule 1 (x64_mulss src1 src2)
37983789
(if-let true (use_avx))
37993790
(xmm_rmir_vex (AvxOpcode.Vmulss) src1 src2))
3791+
(rule 0 (x64_mulss src1 src2) (x64_mulss_a src1 src2))
38003792

38013793
;; Helper for creating `mulsd` instructions.
38023794
(decl x64_mulsd (Xmm XmmMem) Xmm)
3803-
(rule (x64_mulsd src1 src2)
3804-
(xmm_rm_r_unaligned (SseOpcode.Mulsd) src1 src2))
38053795
(rule 1 (x64_mulsd src1 src2)
38063796
(if-let true (use_avx))
38073797
(xmm_rmir_vex (AvxOpcode.Vmulsd) src1 src2))
3798+
(rule 0 (x64_mulsd src1 src2) (x64_mulsd_a src1 src2))
38083799

38093800
;; Helper for creating `mulps` instructions.
38103801
(decl x64_mulps (Xmm XmmMem) Xmm)
3811-
(rule 0 (x64_mulps src1 src2)
3812-
(xmm_rm_r (SseOpcode.Mulps) src1 src2))
38133802
(rule 1 (x64_mulps src1 src2)
38143803
(if-let true (use_avx))
38153804
(xmm_rmir_vex (AvxOpcode.Vmulps) src1 src2))
3805+
(rule 0 (x64_mulps src1 src2) (x64_mulps_a src1 src2))
38163806

38173807
;; Helper for creating `mulpd` instructions.
38183808
(decl x64_mulpd (Xmm XmmMem) Xmm)
3819-
(rule (x64_mulpd src1 src2)
3820-
(xmm_rm_r (SseOpcode.Mulpd) src1 src2))
38213809
(rule 1 (x64_mulpd src1 src2)
38223810
(if-let true (use_avx))
38233811
(xmm_rmir_vex (AvxOpcode.Vmulpd) src1 src2))
3812+
(rule 0 (x64_mulpd src1 src2) (x64_mulpd_a src1 src2))
38243813

38253814
;; Helper for creating `divss` instructions.
38263815
(decl x64_divss (Xmm XmmMem) Xmm)

cranelift/filetests/filetests/isa/x64/simd-widen-mul.clif

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ block0(v0: i8x16, v1: i8x16):
1717
; pmovsxbw %xmm0, %xmm0
1818
; palignr $8, %xmm1, %xmm1, %xmm1
1919
; pmovsxbw %xmm1, %xmm1
20-
; pmullw %xmm0, %xmm1, %xmm0
20+
; pmullw %xmm1, %xmm0
2121
; movq %rbp, %rsp
2222
; popq %rbp
2323
; ret
@@ -49,8 +49,8 @@ block0(v0: i16x8, v1: i16x8):
4949
; movq %rsp, %rbp
5050
; block0:
5151
; movdqa %xmm0, %xmm5
52-
; pmullw %xmm5, %xmm1, %xmm5
53-
; pmulhw %xmm0, %xmm1, %xmm0
52+
; pmullw %xmm1, %xmm5
53+
; pmulhw %xmm1, %xmm0
5454
; movdqa %xmm0, %xmm2
5555
; movdqa %xmm5, %xmm0
5656
; punpckhwd %xmm0, %xmm2, %xmm0
@@ -87,7 +87,7 @@ block0(v0: i32x4, v1: i32x4):
8787
; block0:
8888
; pshufd $250, %xmm0, %xmm0
8989
; pshufd $250, %xmm1, %xmm5
90-
; pmuldq %xmm0, %xmm5, %xmm0
90+
; pmuldq %xmm5, %xmm0
9191
; movq %rbp, %rsp
9292
; popq %rbp
9393
; ret
@@ -118,7 +118,7 @@ block0(v0: i8x16, v1: i8x16):
118118
; block0:
119119
; pmovsxbw %xmm0, %xmm0
120120
; pmovsxbw %xmm1, %xmm5
121-
; pmullw %xmm0, %xmm5, %xmm0
121+
; pmullw %xmm5, %xmm0
122122
; movq %rbp, %rsp
123123
; popq %rbp
124124
; ret
@@ -148,8 +148,8 @@ block0(v0: i16x8, v1: i16x8):
148148
; movq %rsp, %rbp
149149
; block0:
150150
; movdqa %xmm0, %xmm5
151-
; pmullw %xmm5, %xmm1, %xmm5
152-
; pmulhw %xmm0, %xmm1, %xmm0
151+
; pmullw %xmm1, %xmm5
152+
; pmulhw %xmm1, %xmm0
153153
; movdqa %xmm0, %xmm2
154154
; movdqa %xmm5, %xmm0
155155
; punpcklwd %xmm0, %xmm2, %xmm0
@@ -186,7 +186,7 @@ block0(v0: i32x4, v1: i32x4):
186186
; block0:
187187
; pshufd $80, %xmm0, %xmm0
188188
; pshufd $80, %xmm1, %xmm5
189-
; pmuldq %xmm0, %xmm5, %xmm0
189+
; pmuldq %xmm5, %xmm0
190190
; movq %rbp, %rsp
191191
; popq %rbp
192192
; ret
@@ -221,7 +221,7 @@ block0(v0: i8x16, v1: i8x16):
221221
; uninit %xmm2
222222
; pxor %xmm2, %xmm2
223223
; punpckhbw %xmm1, %xmm2, %xmm1
224-
; pmullw %xmm0, %xmm1, %xmm0
224+
; pmullw %xmm1, %xmm0
225225
; movq %rbp, %rsp
226226
; popq %rbp
227227
; ret
@@ -253,8 +253,8 @@ block0(v0: i16x8, v1: i16x8):
253253
; movq %rsp, %rbp
254254
; block0:
255255
; movdqa %xmm0, %xmm5
256-
; pmullw %xmm5, %xmm1, %xmm5
257-
; pmulhuw %xmm0, %xmm1, %xmm0
256+
; pmullw %xmm1, %xmm5
257+
; pmulhuw %xmm1, %xmm0
258258
; movdqa %xmm0, %xmm2
259259
; movdqa %xmm5, %xmm0
260260
; punpckhwd %xmm0, %xmm2, %xmm0
@@ -291,7 +291,7 @@ block0(v0: i32x4, v1: i32x4):
291291
; block0:
292292
; pshufd $250, %xmm0, %xmm0
293293
; pshufd $250, %xmm1, %xmm5
294-
; pmuludq %xmm0, %xmm5, %xmm0
294+
; pmuludq %xmm5, %xmm0
295295
; movq %rbp, %rsp
296296
; popq %rbp
297297
; ret
@@ -322,7 +322,7 @@ block0(v0: i8x16, v1: i8x16):
322322
; block0:
323323
; pmovzxbw %xmm0, %xmm0
324324
; pmovzxbw %xmm1, %xmm5
325-
; pmullw %xmm0, %xmm5, %xmm0
325+
; pmullw %xmm5, %xmm0
326326
; movq %rbp, %rsp
327327
; popq %rbp
328328
; ret
@@ -352,8 +352,8 @@ block0(v0: i16x8, v1: i16x8):
352352
; movq %rsp, %rbp
353353
; block0:
354354
; movdqa %xmm0, %xmm5
355-
; pmullw %xmm5, %xmm1, %xmm5
356-
; pmulhuw %xmm0, %xmm1, %xmm0
355+
; pmullw %xmm1, %xmm5
356+
; pmulhuw %xmm1, %xmm0
357357
; movdqa %xmm0, %xmm2
358358
; movdqa %xmm5, %xmm0
359359
; punpcklwd %xmm0, %xmm2, %xmm0
@@ -390,7 +390,7 @@ block0(v0: i32x4, v1: i32x4):
390390
; block0:
391391
; pshufd $80, %xmm0, %xmm0
392392
; pshufd $80, %xmm1, %xmm5
393-
; pmuludq %xmm0, %xmm5, %xmm0
393+
; pmuludq %xmm5, %xmm0
394394
; movq %rbp, %rsp
395395
; popq %rbp
396396
; ret

cranelift/filetests/filetests/isa/x64/sqmul_round_sat.clif

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ block0(v0: i16x8, v1: i16x8):
1111
; pushq %rbp
1212
; movq %rsp, %rbp
1313
; block0:
14-
; pmulhrsw %xmm0, %xmm1, %xmm0
14+
; pmulhrsw %xmm1, %xmm0
1515
; movdqa %xmm0, %xmm5
1616
; pcmpeqw %xmm5, const(0), %xmm5
1717
; pxor %xmm5, %xmm0

0 commit comments

Comments
 (0)