Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15228,11 +15228,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// scalar type is legal. Only do this before legalize ops, since the target
// maybe depending on the bitcast.
// First check to see if this is all constant.
// TODO: Support FP bitcasts after legalize types.
if (VT.isVector() &&
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This TODO can be removed I guess - did you investigate the history behind this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://reviews.llvm.org/D58884, adding what I think is the necessary check.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAICT, the intermediate types for the float version are just integers, so if we are allowing integers seems the same as allowing FP. @topperc any reason this is buggy?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@topperc any comment?

(!LegalTypes ||
(!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
TLI.isTypeLegal(VT.getVectorElementType()))) &&
(!LegalOperations && TLI.isTypeLegal(VT.getVectorElementType()))) &&
N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
cast<BuildVectorSDNode>(N0)->isConstant())
return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,8 @@ define void @insert_vec_v6i64_uaddlv_from_v4i32(ptr %0) {
; CHECK-LABEL: insert_vec_v6i64_uaddlv_from_v4i32:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: movi.2d v0, #0000000000000000
; CHECK-NEXT: movi.2d v2, #0000000000000000
; CHECK-NEXT: str xzr, [x0, #16]
; CHECK-NEXT: uaddlv.4s d1, v0
; CHECK-NEXT: str d2, [x0, #16]
; CHECK-NEXT: mov.d v0[0], v1[0]
; CHECK-NEXT: ucvtf.2d v0, v0
; CHECK-NEXT: fcvtn v0.2s, v0.2d
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,7 @@ define void @store_v2i16(ptr %a) {
define void @store_v2f16(ptr %a) {
; CHECK-LABEL: store_v2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, #0 // =0x0
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: str w8, [x0]
; CHECK-NEXT: str wzr, [x0]
; CHECK-NEXT: ret
store <2 x half> zeroinitializer, ptr %a
ret void
Expand Down
84 changes: 55 additions & 29 deletions llvm/test/CodeGen/Mips/cconv/vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5337,35 +5337,33 @@ define void @callfloat_2() {
; MIPS32R5-NEXT: jr $ra
; MIPS32R5-NEXT: nop
;
; MIPS64R5-LABEL: callfloat_2:
; MIPS64R5: # %bb.0: # %entry
; MIPS64R5-NEXT: daddiu $sp, $sp, -16
; MIPS64R5-NEXT: .cfi_def_cfa_offset 16
; MIPS64R5-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
; MIPS64R5-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill
; MIPS64R5-NEXT: .cfi_offset 31, -8
; MIPS64R5-NEXT: .cfi_offset 28, -16
; MIPS64R5-NEXT: lui $1, %hi(%neg(%gp_rel(callfloat_2)))
; MIPS64R5-NEXT: daddu $1, $1, $25
; MIPS64R5-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(callfloat_2)))
; MIPS64R5-NEXT: ld $1, %got_page(.LCPI37_0)($gp)
; MIPS64R5-NEXT: daddiu $1, $1, %got_ofst(.LCPI37_0)
; MIPS64R5-NEXT: ld.d $w0, 0($1)
; MIPS64R5-NEXT: copy_s.d $4, $w0[0]
; MIPS64R5-NEXT: ld $1, %got_page(.LCPI37_1)($gp)
; MIPS64R5-NEXT: daddiu $1, $1, %got_ofst(.LCPI37_1)
; MIPS64R5-NEXT: ld.d $w0, 0($1)
; MIPS64R5-NEXT: copy_s.d $5, $w0[0]
; MIPS64R5-NEXT: ld $25, %call16(float2_extern)($gp)
; MIPS64R5-NEXT: jalr $25
; MIPS64R5-NEXT: nop
; MIPS64R5-NEXT: ld $1, %got_disp(gv2f32)($gp)
; MIPS64R5-NEXT: sd $2, 0($1)
; MIPS64R5-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload
; MIPS64R5-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
; MIPS64R5-NEXT: daddiu $sp, $sp, 16
; MIPS64R5-NEXT: jr $ra
; MIPS64R5-NEXT: nop
; MIPS64R5EB-LABEL: callfloat_2:
; MIPS64R5EB: # %bb.0: # %entry
; MIPS64R5EB-NEXT: daddiu $sp, $sp, -16
; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 16
; MIPS64R5EB-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
; MIPS64R5EB-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill
; MIPS64R5EB-NEXT: .cfi_offset 31, -8
; MIPS64R5EB-NEXT: .cfi_offset 28, -16
; MIPS64R5EB-NEXT: lui $1, %hi(%neg(%gp_rel(callfloat_2)))
; MIPS64R5EB-NEXT: daddu $1, $1, $25
; MIPS64R5EB-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(callfloat_2)))
; MIPS64R5EB-NEXT: daddiu $1, $zero, 383
; MIPS64R5EB-NEXT: dsll $4, $1, 23
; MIPS64R5EB-NEXT: daddiu $1, $zero, 261
; MIPS64R5EB-NEXT: dsll $1, $1, 33
; MIPS64R5EB-NEXT: daddiu $1, $1, 523
; MIPS64R5EB-NEXT: dsll $5, $1, 21
; MIPS64R5EB-NEXT: ld $25, %call16(float2_extern)($gp)
; MIPS64R5EB-NEXT: jalr $25
; MIPS64R5EB-NEXT: nop
; MIPS64R5EB-NEXT: ld $1, %got_disp(gv2f32)($gp)
; MIPS64R5EB-NEXT: sd $2, 0($1)
; MIPS64R5EB-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload
; MIPS64R5EB-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
; MIPS64R5EB-NEXT: daddiu $sp, $sp, 16
; MIPS64R5EB-NEXT: jr $ra
; MIPS64R5EB-NEXT: nop
;
; MIPS64EL-LABEL: callfloat_2:
; MIPS64EL: # %bb.0: # %entry
Expand Down Expand Up @@ -5394,6 +5392,34 @@ define void @callfloat_2() {
; MIPS64EL-NEXT: daddiu $sp, $sp, 16
; MIPS64EL-NEXT: jr $ra
; MIPS64EL-NEXT: nop
;
; MIPS64R5EL-LABEL: callfloat_2:
; MIPS64R5EL: # %bb.0: # %entry
; MIPS64R5EL-NEXT: daddiu $sp, $sp, -16
; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 16
; MIPS64R5EL-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
; MIPS64R5EL-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill
; MIPS64R5EL-NEXT: .cfi_offset 31, -8
; MIPS64R5EL-NEXT: .cfi_offset 28, -16
; MIPS64R5EL-NEXT: lui $1, %hi(%neg(%gp_rel(callfloat_2)))
; MIPS64R5EL-NEXT: daddu $1, $1, $25
; MIPS64R5EL-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(callfloat_2)))
; MIPS64R5EL-NEXT: daddiu $1, $zero, 383
; MIPS64R5EL-NEXT: dsll $4, $1, 55
; MIPS64R5EL-NEXT: daddiu $1, $zero, 523
; MIPS64R5EL-NEXT: dsll $1, $1, 31
; MIPS64R5EL-NEXT: daddiu $1, $1, 261
; MIPS64R5EL-NEXT: dsll $5, $1, 22
; MIPS64R5EL-NEXT: ld $25, %call16(float2_extern)($gp)
; MIPS64R5EL-NEXT: jalr $25
; MIPS64R5EL-NEXT: nop
; MIPS64R5EL-NEXT: ld $1, %got_disp(gv2f32)($gp)
; MIPS64R5EL-NEXT: sd $2, 0($1)
; MIPS64R5EL-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload
; MIPS64R5EL-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
; MIPS64R5EL-NEXT: daddiu $sp, $sp, 16
; MIPS64R5EL-NEXT: jr $ra
; MIPS64R5EL-NEXT: nop
entry:
%0 = call <2 x float> @float2_extern(<2 x float> <float 0.0, float -1.0>, <2 x float> <float 12.0, float 14.0>)
store <2 x float> %0, ptr @gv2f32
Expand Down
9 changes: 5 additions & 4 deletions llvm/test/CodeGen/X86/combine-concatvectors.ll
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ define void @concat_of_broadcast_v2f64_v4f64() {
; AVX1-NEXT: movq %rcx, 46348(%rax)
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm0 = [1065353216,1065353216,1065353216,1065353216,1065353216,1065353216,1065353216,1065353216]
; AVX1-NEXT: vmovups %ymm0, 48296(%rax)
; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = [7.812501848093234E-3,0.0E+0]
; AVX1-NEXT: vmovsd %xmm0, 47372(%rax)
; AVX1-NEXT: movabsq $4575657222473777152, %rcx # imm = 0x3F8000003F800000
; AVX1-NEXT: movq %rcx, 47372(%rax)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
Expand All @@ -61,9 +61,10 @@ define void @concat_of_broadcast_v2f64_v4f64() {
; AVX2-NEXT: movl $1091567616, 30256(%rax) # imm = 0x41100000
; AVX2-NEXT: movabsq $4294967297, %rcx # imm = 0x100000001
; AVX2-NEXT: movq %rcx, 46348(%rax)
; AVX2-NEXT: vbroadcastss {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm0 = [4575657222473777152,4575657222473777152,4575657222473777152,4575657222473777152]
; AVX2-NEXT: vmovups %ymm0, 48296(%rax)
; AVX2-NEXT: vmovlps %xmm0, 47372(%rax)
; AVX2-NEXT: movabsq $4575657222473777152, %rcx # imm = 0x3F8000003F800000
; AVX2-NEXT: movq %rcx, 47372(%rax)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
alloca_0:
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/X86/extractelement-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,8 @@ define <3 x double> @extvselectsetcc_crash(<2 x double> %x) {
; X64-LABEL: extvselectsetcc_crash:
; X64: # %bb.0:
; X64-NEXT: vcmpeqpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; X64-NEXT: vmovsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
; X64-NEXT: movabsq $4607182418800017408, %rax # imm = 0x3FF0000000000000
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not an expert on X86 assembly, but this test change and the one above seem to produce more instructions so is that a regression?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its an optimization to save a load, I'm not 100% sure if the optimization is sound, but think it should be addressed there not here.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vector loads vs rematerialization vs scalar constants has never been ideal on x86

; X64-NEXT: vmovq %rax, %xmm2
; X64-NEXT: vandpd %xmm2, %xmm1, %xmm1
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,3,3]
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/ret-mmx.ll
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ define <2 x i32> @t3() nounwind {
define double @t4() nounwind {
; CHECK-LABEL: t4:
; CHECK: ## %bb.0:
; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1,0,0,0]
; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.9406564584124654E-324,0.0E+0]
; CHECK-NEXT: retq
ret double bitcast (<2 x i32> <i32 1, i32 0> to double)
}
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/vec_zero_cse.ll
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ define void @test2() {
; X86: # %bb.0:
; X86-NEXT: movl $-1, M1+4
; X86-NEXT: movl $-1, M1
; X86-NEXT: pcmpeqd %xmm0, %xmm0
; X86-NEXT: movq %xmm0, M2
; X86-NEXT: movl $-1, M2+4
; X86-NEXT: movl $-1, M2
; X86-NEXT: retl
;
; X64-LABEL: test2:
Expand Down
9 changes: 4 additions & 5 deletions llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,10 @@ define void @PR46178(ptr %0) {
; X86-NEXT: vmovdqu (%eax), %ymm1
; X86-NEXT: vpmovqw %ymm0, %xmm0
; X86-NEXT: vpmovqw %ymm1, %xmm1
; X86-NEXT: vpsllw $8, %xmm0, %xmm0
; X86-NEXT: vpsraw $8, %xmm0, %xmm0
; X86-NEXT: vpsllw $8, %xmm1, %xmm1
; X86-NEXT: vpsraw $8, %xmm1, %xmm1
; X86-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; X86-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; X86-NEXT: vpsllw $8, %ymm0, %ymm0
; X86-NEXT: vpsraw $8, %ymm0, %ymm0
; X86-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,1]
; X86-NEXT: vmovdqu %ymm0, (%eax)
; X86-NEXT: vzeroupper
; X86-NEXT: retl
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/widen_shuffle-1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ define void @shuf5(ptr %p) nounwind {
; X86-LABEL: shuf5:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movsd {{.*#+}} xmm0 = [33,33,33,33,33,33,33,33,0,0,0,0,0,0,0,0]
; X86-NEXT: movsd %xmm0, (%eax)
; X86-NEXT: movl $555819297, 4(%eax) # imm = 0x21212121
; X86-NEXT: movl $555819297, (%eax) # imm = 0x21212121
; X86-NEXT: retl
;
; X64-LABEL: shuf5:
Expand Down