Skip to content

Commit 8386a1b

Browse files
committed
x64: convert SSE multiplications
1 parent 5a47b11 commit 8386a1b

File tree

3 files changed

+28
-39
lines changed

3 files changed

+28
-39
lines changed

cranelift/codegen/src/isa/x64/inst.isle

Lines changed: 11 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3612,59 +3612,52 @@
36123612

36133613
;; Helper for creating `pmullw` instructions.
36143614
(decl x64_pmullw (Xmm XmmMem) Xmm)
3615-
(rule 0 (x64_pmullw src1 src2)
3616-
(xmm_rm_r (SseOpcode.Pmullw) src1 src2))
36173615
(rule 1 (x64_pmullw src1 src2)
36183616
(if-let true (use_avx))
36193617
(xmm_rmir_vex (AvxOpcode.Vpmullw) src1 src2))
3618+
(rule 0 (x64_pmullw src1 src2) (x64_pmullw_a src1 src2))
36203619

36213620
;; Helper for creating `pmulld` instructions.
36223621
(decl x64_pmulld (Xmm XmmMem) Xmm)
3623-
(rule 0 (x64_pmulld src1 src2)
3624-
(xmm_rm_r (SseOpcode.Pmulld) src1 src2))
36253622
(rule 1 (x64_pmulld src1 src2)
36263623
(if-let true (use_avx))
36273624
(xmm_rmir_vex (AvxOpcode.Vpmulld) src1 src2))
3625+
(rule 0 (x64_pmulld src1 src2) (x64_pmulld_a src1 src2))
36283626

36293627
;; Helper for creating `pmulhw` instructions.
36303628
(decl x64_pmulhw (Xmm XmmMem) Xmm)
3631-
(rule 0 (x64_pmulhw src1 src2)
3632-
(xmm_rm_r (SseOpcode.Pmulhw) src1 src2))
36333629
(rule 1 (x64_pmulhw src1 src2)
36343630
(if-let true (use_avx))
36353631
(xmm_rmir_vex (AvxOpcode.Vpmulhw) src1 src2))
3632+
(rule 0 (x64_pmulhw src1 src2) (x64_pmulhw_a src1 src2))
36363633

36373634
;; Helper for creating `pmulhrsw` instructions.
36383635
(decl x64_pmulhrsw (Xmm XmmMem) Xmm)
3639-
(rule 0 (x64_pmulhrsw src1 src2)
3640-
(xmm_rm_r (SseOpcode.Pmulhrsw) src1 src2))
36413636
(rule 1 (x64_pmulhrsw src1 src2)
36423637
(if-let true (use_avx))
36433638
(xmm_rmir_vex (AvxOpcode.Vpmulhrsw) src1 src2))
3639+
(rule 0 (x64_pmulhrsw src1 src2) (x64_pmulhrsw_a src1 src2))
36443640

36453641
;; Helper for creating `pmulhuw` instructions.
36463642
(decl x64_pmulhuw (Xmm XmmMem) Xmm)
3647-
(rule 0 (x64_pmulhuw src1 src2)
3648-
(xmm_rm_r (SseOpcode.Pmulhuw) src1 src2))
36493643
(rule 1 (x64_pmulhuw src1 src2)
36503644
(if-let true (use_avx))
36513645
(xmm_rmir_vex (AvxOpcode.Vpmulhuw) src1 src2))
3646+
(rule 0 (x64_pmulhuw src1 src2) (x64_pmulhuw_a src1 src2))
36523647

36533648
;; Helper for creating `pmuldq` instructions.
36543649
(decl x64_pmuldq (Xmm XmmMem) Xmm)
3655-
(rule 0 (x64_pmuldq src1 src2)
3656-
(xmm_rm_r (SseOpcode.Pmuldq) src1 src2))
36573650
(rule 1 (x64_pmuldq src1 src2)
36583651
(if-let true (use_avx))
36593652
(xmm_rmir_vex (AvxOpcode.Vpmuldq) src1 src2))
3653+
(rule 0 (x64_pmuldq src1 src2) (x64_pmuldq_a src1 src2))
36603654

36613655
;; Helper for creating `pmuludq` instructions.
36623656
(decl x64_pmuludq (Xmm XmmMem) Xmm)
3663-
(rule 0 (x64_pmuludq src1 src2)
3664-
(xmm_rm_r (SseOpcode.Pmuludq) src1 src2))
36653657
(rule 1 (x64_pmuludq src1 src2)
36663658
(if-let true (use_avx))
36673659
(xmm_rmir_vex (AvxOpcode.Vpmuludq) src1 src2))
3660+
(rule 0 (x64_pmuludq src1 src2) (x64_pmuludq_a src1 src2))
36683661

36693662
;; Helper for creating `punpckhwd` instructions.
36703663
(decl x64_punpckhwd (Xmm XmmMem) Xmm)
@@ -3817,35 +3810,31 @@
38173810

38183811
;; Helper for creating `mulss` instructions.
38193812
(decl x64_mulss (Xmm XmmMem) Xmm)
3820-
(rule (x64_mulss src1 src2)
3821-
(xmm_rm_r_unaligned (SseOpcode.Mulss) src1 src2))
38223813
(rule 1 (x64_mulss src1 src2)
38233814
(if-let true (use_avx))
38243815
(xmm_rmir_vex (AvxOpcode.Vmulss) src1 src2))
3816+
(rule 0 (x64_mulss src1 src2) (x64_mulss_a src1 src2))
38253817

38263818
;; Helper for creating `mulsd` instructions.
38273819
(decl x64_mulsd (Xmm XmmMem) Xmm)
3828-
(rule (x64_mulsd src1 src2)
3829-
(xmm_rm_r_unaligned (SseOpcode.Mulsd) src1 src2))
38303820
(rule 1 (x64_mulsd src1 src2)
38313821
(if-let true (use_avx))
38323822
(xmm_rmir_vex (AvxOpcode.Vmulsd) src1 src2))
3823+
(rule 0 (x64_mulsd src1 src2) (x64_mulsd_a src1 src2))
38333824

38343825
;; Helper for creating `mulps` instructions.
38353826
(decl x64_mulps (Xmm XmmMem) Xmm)
3836-
(rule 0 (x64_mulps src1 src2)
3837-
(xmm_rm_r (SseOpcode.Mulps) src1 src2))
38383827
(rule 1 (x64_mulps src1 src2)
38393828
(if-let true (use_avx))
38403829
(xmm_rmir_vex (AvxOpcode.Vmulps) src1 src2))
3830+
(rule 0 (x64_mulps src1 src2) (x64_mulps_a src1 src2))
38413831

38423832
;; Helper for creating `mulpd` instructions.
38433833
(decl x64_mulpd (Xmm XmmMem) Xmm)
3844-
(rule (x64_mulpd src1 src2)
3845-
(xmm_rm_r (SseOpcode.Mulpd) src1 src2))
38463834
(rule 1 (x64_mulpd src1 src2)
38473835
(if-let true (use_avx))
38483836
(xmm_rmir_vex (AvxOpcode.Vmulpd) src1 src2))
3837+
(rule 0 (x64_mulpd src1 src2) (x64_mulpd_a src1 src2))
38493838

38503839
;; Helper for creating `divss` instructions.
38513840
(decl x64_divss (Xmm XmmMem) Xmm)

cranelift/filetests/filetests/isa/x64/simd-widen-mul.clif

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ block0(v0: i8x16, v1: i8x16):
1717
; pmovsxbw %xmm0, %xmm0
1818
; palignr $8, %xmm1, %xmm1, %xmm1
1919
; pmovsxbw %xmm1, %xmm1
20-
; pmullw %xmm0, %xmm1, %xmm0
20+
; pmullw %xmm1, %xmm0
2121
; movq %rbp, %rsp
2222
; popq %rbp
2323
; ret
@@ -49,8 +49,8 @@ block0(v0: i16x8, v1: i16x8):
4949
; movq %rsp, %rbp
5050
; block0:
5151
; movdqa %xmm0, %xmm5
52-
; pmullw %xmm5, %xmm1, %xmm5
53-
; pmulhw %xmm0, %xmm1, %xmm0
52+
; pmullw %xmm1, %xmm5
53+
; pmulhw %xmm1, %xmm0
5454
; movdqa %xmm0, %xmm2
5555
; movdqa %xmm5, %xmm0
5656
; punpckhwd %xmm0, %xmm2, %xmm0
@@ -87,7 +87,7 @@ block0(v0: i32x4, v1: i32x4):
8787
; block0:
8888
; pshufd $250, %xmm0, %xmm0
8989
; pshufd $250, %xmm1, %xmm5
90-
; pmuldq %xmm0, %xmm5, %xmm0
90+
; pmuldq %xmm5, %xmm0
9191
; movq %rbp, %rsp
9292
; popq %rbp
9393
; ret
@@ -118,7 +118,7 @@ block0(v0: i8x16, v1: i8x16):
118118
; block0:
119119
; pmovsxbw %xmm0, %xmm0
120120
; pmovsxbw %xmm1, %xmm5
121-
; pmullw %xmm0, %xmm5, %xmm0
121+
; pmullw %xmm5, %xmm0
122122
; movq %rbp, %rsp
123123
; popq %rbp
124124
; ret
@@ -148,8 +148,8 @@ block0(v0: i16x8, v1: i16x8):
148148
; movq %rsp, %rbp
149149
; block0:
150150
; movdqa %xmm0, %xmm5
151-
; pmullw %xmm5, %xmm1, %xmm5
152-
; pmulhw %xmm0, %xmm1, %xmm0
151+
; pmullw %xmm1, %xmm5
152+
; pmulhw %xmm1, %xmm0
153153
; movdqa %xmm0, %xmm2
154154
; movdqa %xmm5, %xmm0
155155
; punpcklwd %xmm0, %xmm2, %xmm0
@@ -186,7 +186,7 @@ block0(v0: i32x4, v1: i32x4):
186186
; block0:
187187
; pshufd $80, %xmm0, %xmm0
188188
; pshufd $80, %xmm1, %xmm5
189-
; pmuldq %xmm0, %xmm5, %xmm0
189+
; pmuldq %xmm5, %xmm0
190190
; movq %rbp, %rsp
191191
; popq %rbp
192192
; ret
@@ -221,7 +221,7 @@ block0(v0: i8x16, v1: i8x16):
221221
; uninit %xmm2
222222
; pxor %xmm2, %xmm2
223223
; punpckhbw %xmm1, %xmm2, %xmm1
224-
; pmullw %xmm0, %xmm1, %xmm0
224+
; pmullw %xmm1, %xmm0
225225
; movq %rbp, %rsp
226226
; popq %rbp
227227
; ret
@@ -253,8 +253,8 @@ block0(v0: i16x8, v1: i16x8):
253253
; movq %rsp, %rbp
254254
; block0:
255255
; movdqa %xmm0, %xmm5
256-
; pmullw %xmm5, %xmm1, %xmm5
257-
; pmulhuw %xmm0, %xmm1, %xmm0
256+
; pmullw %xmm1, %xmm5
257+
; pmulhuw %xmm1, %xmm0
258258
; movdqa %xmm0, %xmm2
259259
; movdqa %xmm5, %xmm0
260260
; punpckhwd %xmm0, %xmm2, %xmm0
@@ -291,7 +291,7 @@ block0(v0: i32x4, v1: i32x4):
291291
; block0:
292292
; pshufd $250, %xmm0, %xmm0
293293
; pshufd $250, %xmm1, %xmm5
294-
; pmuludq %xmm0, %xmm5, %xmm0
294+
; pmuludq %xmm5, %xmm0
295295
; movq %rbp, %rsp
296296
; popq %rbp
297297
; ret
@@ -322,7 +322,7 @@ block0(v0: i8x16, v1: i8x16):
322322
; block0:
323323
; pmovzxbw %xmm0, %xmm0
324324
; pmovzxbw %xmm1, %xmm5
325-
; pmullw %xmm0, %xmm5, %xmm0
325+
; pmullw %xmm5, %xmm0
326326
; movq %rbp, %rsp
327327
; popq %rbp
328328
; ret
@@ -352,8 +352,8 @@ block0(v0: i16x8, v1: i16x8):
352352
; movq %rsp, %rbp
353353
; block0:
354354
; movdqa %xmm0, %xmm5
355-
; pmullw %xmm5, %xmm1, %xmm5
356-
; pmulhuw %xmm0, %xmm1, %xmm0
355+
; pmullw %xmm1, %xmm5
356+
; pmulhuw %xmm1, %xmm0
357357
; movdqa %xmm0, %xmm2
358358
; movdqa %xmm5, %xmm0
359359
; punpcklwd %xmm0, %xmm2, %xmm0
@@ -390,7 +390,7 @@ block0(v0: i32x4, v1: i32x4):
390390
; block0:
391391
; pshufd $80, %xmm0, %xmm0
392392
; pshufd $80, %xmm1, %xmm5
393-
; pmuludq %xmm0, %xmm5, %xmm0
393+
; pmuludq %xmm5, %xmm0
394394
; movq %rbp, %rsp
395395
; popq %rbp
396396
; ret

cranelift/filetests/filetests/isa/x64/sqmul_round_sat.clif

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ block0(v0: i16x8, v1: i16x8):
1111
; pushq %rbp
1212
; movq %rsp, %rbp
1313
; block0:
14-
; pmulhrsw %xmm0, %xmm1, %xmm0
14+
; pmulhrsw %xmm1, %xmm0
1515
; movdqa %xmm0, %xmm5
1616
; pcmpeqw %xmm5, const(0), %xmm5
1717
; pxor %xmm5, %xmm0

0 commit comments

Comments
 (0)