Skip to content

Commit 0203f70

Browse files
committed
Merging r360512:
------------------------------------------------------------------------ r360512 | ctopper | 2019-05-10 21:19:33 -0700 (Fri, 10 May 2019) | 5 lines [X86] Don't emit MOVNTDQA loads from fast-isel without SSE4.1. We were checking for SSE4.1 for FP types, but not integer 128-bit types. Fixes PR41837. ------------------------------------------------------------------------ llvm-svn: 360749
1 parent 7c1f15e commit 0203f70

File tree

2 files changed

+57
-17
lines changed

2 files changed

+57
-17
lines changed

llvm/lib/Target/X86/X86FastISel.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
399399
case MVT::v2i64:
400400
case MVT::v8i16:
401401
case MVT::v16i8:
402-
if (IsNonTemporal && Alignment >= 16)
402+
if (IsNonTemporal && Alignment >= 16 && HasSSE41)
403403
Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
404404
HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
405405
else if (Alignment >= 16)

llvm/test/CodeGen/X86/fast-isel-nontemporal.ll

Lines changed: 56 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -300,10 +300,20 @@ entry:
300300
}
301301

302302
define <16 x i8> @test_load_nt16xi8(<16 x i8>* nocapture %ptr) {
303-
; SSE-LABEL: test_load_nt16xi8:
304-
; SSE: # %bb.0: # %entry
305-
; SSE-NEXT: movntdqa (%rdi), %xmm0
306-
; SSE-NEXT: retq
303+
; SSE2-LABEL: test_load_nt16xi8:
304+
; SSE2: # %bb.0: # %entry
305+
; SSE2-NEXT: movdqa (%rdi), %xmm0
306+
; SSE2-NEXT: retq
307+
;
308+
; SSE4A-LABEL: test_load_nt16xi8:
309+
; SSE4A: # %bb.0: # %entry
310+
; SSE4A-NEXT: movdqa (%rdi), %xmm0
311+
; SSE4A-NEXT: retq
312+
;
313+
; SSE41-LABEL: test_load_nt16xi8:
314+
; SSE41: # %bb.0: # %entry
315+
; SSE41-NEXT: movntdqa (%rdi), %xmm0
316+
; SSE41-NEXT: retq
307317
;
308318
; AVX-LABEL: test_load_nt16xi8:
309319
; AVX: # %bb.0: # %entry
@@ -320,10 +330,20 @@ entry:
320330
}
321331

322332
define <8 x i16> @test_load_nt8xi16(<8 x i16>* nocapture %ptr) {
323-
; SSE-LABEL: test_load_nt8xi16:
324-
; SSE: # %bb.0: # %entry
325-
; SSE-NEXT: movntdqa (%rdi), %xmm0
326-
; SSE-NEXT: retq
333+
; SSE2-LABEL: test_load_nt8xi16:
334+
; SSE2: # %bb.0: # %entry
335+
; SSE2-NEXT: movdqa (%rdi), %xmm0
336+
; SSE2-NEXT: retq
337+
;
338+
; SSE4A-LABEL: test_load_nt8xi16:
339+
; SSE4A: # %bb.0: # %entry
340+
; SSE4A-NEXT: movdqa (%rdi), %xmm0
341+
; SSE4A-NEXT: retq
342+
;
343+
; SSE41-LABEL: test_load_nt8xi16:
344+
; SSE41: # %bb.0: # %entry
345+
; SSE41-NEXT: movntdqa (%rdi), %xmm0
346+
; SSE41-NEXT: retq
327347
;
328348
; AVX-LABEL: test_load_nt8xi16:
329349
; AVX: # %bb.0: # %entry
@@ -340,10 +360,20 @@ entry:
340360
}
341361

342362
define <4 x i32> @test_load_nt4xi32(<4 x i32>* nocapture %ptr) {
343-
; SSE-LABEL: test_load_nt4xi32:
344-
; SSE: # %bb.0: # %entry
345-
; SSE-NEXT: movntdqa (%rdi), %xmm0
346-
; SSE-NEXT: retq
363+
; SSE2-LABEL: test_load_nt4xi32:
364+
; SSE2: # %bb.0: # %entry
365+
; SSE2-NEXT: movdqa (%rdi), %xmm0
366+
; SSE2-NEXT: retq
367+
;
368+
; SSE4A-LABEL: test_load_nt4xi32:
369+
; SSE4A: # %bb.0: # %entry
370+
; SSE4A-NEXT: movdqa (%rdi), %xmm0
371+
; SSE4A-NEXT: retq
372+
;
373+
; SSE41-LABEL: test_load_nt4xi32:
374+
; SSE41: # %bb.0: # %entry
375+
; SSE41-NEXT: movntdqa (%rdi), %xmm0
376+
; SSE41-NEXT: retq
347377
;
348378
; AVX-LABEL: test_load_nt4xi32:
349379
; AVX: # %bb.0: # %entry
@@ -360,10 +390,20 @@ entry:
360390
}
361391

362392
define <2 x i64> @test_load_nt2xi64(<2 x i64>* nocapture %ptr) {
363-
; SSE-LABEL: test_load_nt2xi64:
364-
; SSE: # %bb.0: # %entry
365-
; SSE-NEXT: movntdqa (%rdi), %xmm0
366-
; SSE-NEXT: retq
393+
; SSE2-LABEL: test_load_nt2xi64:
394+
; SSE2: # %bb.0: # %entry
395+
; SSE2-NEXT: movdqa (%rdi), %xmm0
396+
; SSE2-NEXT: retq
397+
;
398+
; SSE4A-LABEL: test_load_nt2xi64:
399+
; SSE4A: # %bb.0: # %entry
400+
; SSE4A-NEXT: movdqa (%rdi), %xmm0
401+
; SSE4A-NEXT: retq
402+
;
403+
; SSE41-LABEL: test_load_nt2xi64:
404+
; SSE41: # %bb.0: # %entry
405+
; SSE41-NEXT: movntdqa (%rdi), %xmm0
406+
; SSE41-NEXT: retq
367407
;
368408
; AVX-LABEL: test_load_nt2xi64:
369409
; AVX: # %bb.0: # %entry

0 commit comments

Comments
 (0)