-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[VPlan] Fix packed replication of struct types #160274
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
cb03669
767ec04
35cf689
b70ffc2
c2a58e5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -166,7 +166,7 @@ define void @struct_return_2xf32_replicate(ptr noalias %in, ptr noalias writeonl | |
; VF4-NEXT: store <4 x float> [[TMP42]], ptr [[TMP45]], align 4 | ||
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 | ||
; VF4-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 | ||
; VF4-NEXT: br i1 [[TMP47]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] | ||
; VF4-NEXT: br i1 [[TMP47]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] | ||
; VF4: [[MIDDLE_BLOCK]]: | ||
; | ||
; VF2IC2-LABEL: define void @struct_return_2xf32_replicate( | ||
|
@@ -233,7 +233,7 @@ define void @struct_return_2xf32_replicate(ptr noalias %in, ptr noalias writeonl | |
; VF2IC2-NEXT: store <2 x float> [[TMP44]], ptr [[TMP50]], align 4 | ||
; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 | ||
; VF2IC2-NEXT: [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 | ||
; VF2IC2-NEXT: br i1 [[TMP51]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] | ||
; VF2IC2-NEXT: br i1 [[TMP51]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] | ||
; VF2IC2: [[MIDDLE_BLOCK]]: | ||
; | ||
entry: | ||
|
@@ -336,7 +336,7 @@ define void @struct_return_3xi32_replicate(ptr noalias %in, ptr noalias writeonl | |
; VF4-NEXT: store <4 x i32> [[TMP63]], ptr [[TMP64]], align 4 | ||
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 | ||
; VF4-NEXT: [[TMP66:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 | ||
; VF4-NEXT: br i1 [[TMP66]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] | ||
; VF4-NEXT: br i1 [[TMP66]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] | ||
; VF4: [[MIDDLE_BLOCK]]: | ||
; | ||
; VF2IC2-LABEL: define void @struct_return_3xi32_replicate( | ||
|
@@ -425,7 +425,7 @@ define void @struct_return_3xi32_replicate(ptr noalias %in, ptr noalias writeonl | |
; VF2IC2-NEXT: store <2 x i32> [[TMP68]], ptr [[TMP71]], align 4 | ||
; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 | ||
; VF2IC2-NEXT: [[TMP72:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 | ||
; VF2IC2-NEXT: br i1 [[TMP72]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] | ||
; VF2IC2-NEXT: br i1 [[TMP72]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] | ||
lukel97 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
; VF2IC2: [[MIDDLE_BLOCK]]: | ||
; | ||
entry: | ||
|
@@ -453,6 +453,224 @@ exit: | |
ret void | ||
} | ||
|
||
define void @struct_return_2xf32_replicate_predicated(ptr %a) { | ||
; CHECK-LABEL: define void @scalarized_predicated_struct_return | ||
; CHECK: vector.body: | ||
; CHECK: [[WIDE_CALL:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @scalable_vec_masked_bar(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]]) | ||
lukel97 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
; VF4-LABEL: define void @struct_return_2xf32_replicate_predicated( | ||
; VF4-SAME: ptr [[A:%.*]]) { | ||
; VF4-NEXT: [[ENTRY:.*:]] | ||
; VF4-NEXT: br label %[[VECTOR_PH:.*]] | ||
; VF4: [[VECTOR_PH]]: | ||
; VF4-NEXT: br label %[[VECTOR_BODY:.*]] | ||
; VF4: [[VECTOR_BODY]]: | ||
; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE12:.*]] ] | ||
; VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] | ||
; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 8 | ||
; VF4-NEXT: [[TMP1:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], zeroinitializer | ||
; VF4-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 | ||
; VF4-NEXT: br i1 [[TMP2]], label %[[PRED_CALL_IF:.*]], label %[[PRED_CALL_CONTINUE:.*]] | ||
; VF4: [[PRED_CALL_IF]]: | ||
; VF4-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 0 | ||
; VF4-NEXT: [[TMP4:%.*]] = tail call { float, float } @fn2(float [[TMP3]]) #[[ATTR3:[0-9]+]] | ||
; VF4-NEXT: [[TMP5:%.*]] = extractvalue { float, float } [[TMP4]], 0 | ||
; VF4-NEXT: [[TMP6:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i32 0 | ||
; VF4-NEXT: [[TMP7:%.*]] = insertvalue { <4 x float>, <4 x float> } poison, <4 x float> [[TMP6]], 0 | ||
; VF4-NEXT: [[TMP8:%.*]] = extractvalue { float, float } [[TMP4]], 1 | ||
; VF4-NEXT: [[TMP9:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP7]], 1 | ||
; VF4-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP8]], i32 0 | ||
; VF4-NEXT: [[TMP11:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP7]], <4 x float> [[TMP10]], 1 | ||
; VF4-NEXT: br label %[[PRED_CALL_CONTINUE]] | ||
; VF4: [[PRED_CALL_CONTINUE]]: | ||
; VF4-NEXT: [[TMP12:%.*]] = phi { <4 x float>, <4 x float> } [ poison, %[[VECTOR_BODY]] ], [ [[TMP7]], %[[PRED_CALL_IF]] ] | ||
; VF4-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 | ||
; VF4-NEXT: br i1 [[TMP13]], label %[[PRED_CALL_IF1:.*]], label %[[PRED_CALL_CONTINUE2:.*]] | ||
; VF4: [[PRED_CALL_IF1]]: | ||
; VF4-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 1 | ||
; VF4-NEXT: [[TMP15:%.*]] = tail call { float, float } @fn2(float [[TMP14]]) #[[ATTR3]] | ||
; VF4-NEXT: [[TMP16:%.*]] = extractvalue { float, float } [[TMP15]], 0 | ||
; VF4-NEXT: [[TMP17:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP12]], 0 | ||
; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP17]], float [[TMP16]], i32 1 | ||
; VF4-NEXT: [[TMP19:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP12]], <4 x float> [[TMP18]], 0 | ||
; VF4-NEXT: [[TMP20:%.*]] = extractvalue { float, float } [[TMP15]], 1 | ||
; VF4-NEXT: [[TMP21:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP19]], 1 | ||
; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float [[TMP20]], i32 1 | ||
; VF4-NEXT: [[TMP23:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP19]], <4 x float> [[TMP22]], 1 | ||
Comment on lines
+488
to
+495
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do account for the cost of this sequence somewhat accurately? I think you mentioned that you discovered an end-to-end crash for this. From looking at the code we need to generate, I am curious if there are cases where this is ever profitable? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think it is. All the crashes happened in test files with -force-vector-width. It just so happened that changing the predication discount with BFI changed the scalarization decision which led to this in struct-return.ll |
||
; VF4-NEXT: br label %[[PRED_CALL_CONTINUE2]] | ||
; VF4: [[PRED_CALL_CONTINUE2]]: | ||
; VF4-NEXT: [[TMP24:%.*]] = phi { <4 x float>, <4 x float> } [ [[TMP12]], %[[PRED_CALL_CONTINUE]] ], [ [[TMP19]], %[[PRED_CALL_IF1]] ] | ||
; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 | ||
; VF4-NEXT: br i1 [[TMP25]], label %[[PRED_CALL_IF3:.*]], label %[[PRED_CALL_CONTINUE4:.*]] | ||
; VF4: [[PRED_CALL_IF3]]: | ||
; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 2 | ||
; VF4-NEXT: [[TMP27:%.*]] = tail call { float, float } @fn2(float [[TMP26]]) #[[ATTR3]] | ||
; VF4-NEXT: [[TMP28:%.*]] = extractvalue { float, float } [[TMP27]], 0 | ||
; VF4-NEXT: [[TMP29:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP24]], 0 | ||
; VF4-NEXT: [[TMP30:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP28]], i32 2 | ||
; VF4-NEXT: [[TMP31:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP24]], <4 x float> [[TMP30]], 0 | ||
; VF4-NEXT: [[TMP32:%.*]] = extractvalue { float, float } [[TMP27]], 1 | ||
; VF4-NEXT: [[TMP33:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP31]], 1 | ||
; VF4-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP32]], i32 2 | ||
; VF4-NEXT: [[TMP35:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP31]], <4 x float> [[TMP34]], 1 | ||
; VF4-NEXT: br label %[[PRED_CALL_CONTINUE4]] | ||
; VF4: [[PRED_CALL_CONTINUE4]]: | ||
; VF4-NEXT: [[TMP36:%.*]] = phi { <4 x float>, <4 x float> } [ [[TMP24]], %[[PRED_CALL_CONTINUE2]] ], [ [[TMP31]], %[[PRED_CALL_IF3]] ] | ||
; VF4-NEXT: [[TMP37:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 | ||
; VF4-NEXT: br i1 [[TMP37]], label %[[PRED_CALL_IF5:.*]], label %[[PRED_CALL_CONTINUE6:.*]] | ||
; VF4: [[PRED_CALL_IF5]]: | ||
; VF4-NEXT: [[TMP38:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 3 | ||
; VF4-NEXT: [[TMP39:%.*]] = tail call { float, float } @fn2(float [[TMP38]]) #[[ATTR3]] | ||
; VF4-NEXT: [[TMP40:%.*]] = extractvalue { float, float } [[TMP39]], 0 | ||
; VF4-NEXT: [[TMP41:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP36]], 0 | ||
; VF4-NEXT: [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP40]], i32 3 | ||
; VF4-NEXT: [[TMP43:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP36]], <4 x float> [[TMP42]], 0 | ||
; VF4-NEXT: [[TMP44:%.*]] = extractvalue { float, float } [[TMP39]], 1 | ||
; VF4-NEXT: [[TMP45:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP43]], 1 | ||
; VF4-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP44]], i32 3 | ||
; VF4-NEXT: [[TMP47:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP43]], <4 x float> [[TMP46]], 1 | ||
; VF4-NEXT: br label %[[PRED_CALL_CONTINUE6]] | ||
; VF4: [[PRED_CALL_CONTINUE6]]: | ||
; VF4-NEXT: [[TMP48:%.*]] = phi { <4 x float>, <4 x float> } [ [[TMP36]], %[[PRED_CALL_CONTINUE4]] ], [ [[TMP43]], %[[PRED_CALL_IF5]] ] | ||
; VF4-NEXT: [[TMP49:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP48]], 0 | ||
; VF4-NEXT: [[TMP50:%.*]] = fdiv <4 x float> [[TMP49]], [[WIDE_LOAD]] | ||
; VF4-NEXT: [[TMP51:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 | ||
; VF4-NEXT: br i1 [[TMP51]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] | ||
; VF4: [[PRED_STORE_IF]]: | ||
; VF4-NEXT: [[TMP52:%.*]] = add i64 [[INDEX]], 0 | ||
; VF4-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP52]] | ||
; VF4-NEXT: [[TMP54:%.*]] = extractelement <4 x float> [[TMP50]], i32 0 | ||
; VF4-NEXT: store float [[TMP54]], ptr [[TMP53]], align 8 | ||
; VF4-NEXT: br label %[[PRED_STORE_CONTINUE]] | ||
; VF4: [[PRED_STORE_CONTINUE]]: | ||
; VF4-NEXT: [[TMP55:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 | ||
; VF4-NEXT: br i1 [[TMP55]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] | ||
; VF4: [[PRED_STORE_IF7]]: | ||
; VF4-NEXT: [[TMP56:%.*]] = add i64 [[INDEX]], 1 | ||
; VF4-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP56]] | ||
; VF4-NEXT: [[TMP58:%.*]] = extractelement <4 x float> [[TMP50]], i32 1 | ||
; VF4-NEXT: store float [[TMP58]], ptr [[TMP57]], align 8 | ||
; VF4-NEXT: br label %[[PRED_STORE_CONTINUE8]] | ||
; VF4: [[PRED_STORE_CONTINUE8]]: | ||
; VF4-NEXT: [[TMP59:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 | ||
; VF4-NEXT: br i1 [[TMP59]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] | ||
; VF4: [[PRED_STORE_IF9]]: | ||
; VF4-NEXT: [[TMP60:%.*]] = add i64 [[INDEX]], 2 | ||
; VF4-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP60]] | ||
; VF4-NEXT: [[TMP62:%.*]] = extractelement <4 x float> [[TMP50]], i32 2 | ||
; VF4-NEXT: store float [[TMP62]], ptr [[TMP61]], align 8 | ||
; VF4-NEXT: br label %[[PRED_STORE_CONTINUE10]] | ||
; VF4: [[PRED_STORE_CONTINUE10]]: | ||
; VF4-NEXT: [[TMP63:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 | ||
; VF4-NEXT: br i1 [[TMP63]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12]] | ||
; VF4: [[PRED_STORE_IF11]]: | ||
; VF4-NEXT: [[TMP64:%.*]] = add i64 [[INDEX]], 3 | ||
; VF4-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP64]] | ||
; VF4-NEXT: [[TMP66:%.*]] = extractelement <4 x float> [[TMP50]], i32 3 | ||
; VF4-NEXT: store float [[TMP66]], ptr [[TMP65]], align 8 | ||
; VF4-NEXT: br label %[[PRED_STORE_CONTINUE12]] | ||
; VF4: [[PRED_STORE_CONTINUE12]]: | ||
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 | ||
; VF4-NEXT: [[TMP67:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 | ||
; VF4-NEXT: br i1 [[TMP67]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] | ||
; VF4: [[MIDDLE_BLOCK]]: | ||
; | ||
; VF2IC2-LABEL: define void @struct_return_2xf32_replicate_predicated( | ||
; VF2IC2-SAME: ptr [[A:%.*]]) { | ||
; VF2IC2-NEXT: [[ENTRY:.*:]] | ||
; VF2IC2-NEXT: br label %[[VECTOR_PH:.*]] | ||
; VF2IC2: [[VECTOR_PH]]: | ||
; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]] | ||
; VF2IC2: [[VECTOR_BODY]]: | ||
; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ] | ||
; VF2IC2-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] | ||
; VF2IC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 2 | ||
; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP0]], align 8 | ||
; VF2IC2-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x float>, ptr [[TMP1]], align 8 | ||
; VF2IC2-NEXT: [[TMP2:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD]], zeroinitializer | ||
; VF2IC2-NEXT: [[TMP3:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD1]], zeroinitializer | ||
; VF2IC2-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 | ||
; VF2IC2-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] | ||
; VF2IC2: [[PRED_STORE_IF]]: | ||
; VF2IC2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0 | ||
; VF2IC2-NEXT: [[TMP6:%.*]] = tail call { float, float } @fn2(float [[TMP5]]) #[[ATTR3:[0-9]+]] | ||
; VF2IC2-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 | ||
; VF2IC2-NEXT: [[TMP8:%.*]] = extractvalue { float, float } [[TMP6]], 0 | ||
; VF2IC2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]] | ||
; VF2IC2-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0 | ||
; VF2IC2-NEXT: [[TMP11:%.*]] = fdiv float [[TMP8]], [[TMP10]] | ||
; VF2IC2-NEXT: store float [[TMP11]], ptr [[TMP9]], align 8 | ||
; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE]] | ||
; VF2IC2: [[PRED_STORE_CONTINUE]]: | ||
; VF2IC2-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 | ||
; VF2IC2-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3:.*]] | ||
; VF2IC2: [[PRED_STORE_IF2]]: | ||
; VF2IC2-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 1 | ||
; VF2IC2-NEXT: [[TMP14:%.*]] = tail call { float, float } @fn2(float [[TMP13]]) #[[ATTR3]] | ||
; VF2IC2-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 1 | ||
; VF2IC2-NEXT: [[TMP16:%.*]] = extractvalue { float, float } [[TMP14]], 0 | ||
; VF2IC2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP15]] | ||
; VF2IC2-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 1 | ||
; VF2IC2-NEXT: [[TMP19:%.*]] = fdiv float [[TMP16]], [[TMP18]] | ||
; VF2IC2-NEXT: store float [[TMP19]], ptr [[TMP17]], align 8 | ||
; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE3]] | ||
; VF2IC2: [[PRED_STORE_CONTINUE3]]: | ||
; VF2IC2-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 | ||
; VF2IC2-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]] | ||
; VF2IC2: [[PRED_STORE_IF4]]: | ||
; VF2IC2-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 0 | ||
; VF2IC2-NEXT: [[TMP22:%.*]] = tail call { float, float } @fn2(float [[TMP21]]) #[[ATTR3]] | ||
; VF2IC2-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 2 | ||
; VF2IC2-NEXT: [[TMP24:%.*]] = extractvalue { float, float } [[TMP22]], 0 | ||
; VF2IC2-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP23]] | ||
; VF2IC2-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 0 | ||
; VF2IC2-NEXT: [[TMP27:%.*]] = fdiv float [[TMP24]], [[TMP26]] | ||
; VF2IC2-NEXT: store float [[TMP27]], ptr [[TMP25]], align 8 | ||
; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE5]] | ||
; VF2IC2: [[PRED_STORE_CONTINUE5]]: | ||
; VF2IC2-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 | ||
; VF2IC2-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7]] | ||
; VF2IC2: [[PRED_STORE_IF6]]: | ||
; VF2IC2-NEXT: [[TMP29:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 1 | ||
; VF2IC2-NEXT: [[TMP30:%.*]] = tail call { float, float } @fn2(float [[TMP29]]) #[[ATTR3]] | ||
; VF2IC2-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 3 | ||
; VF2IC2-NEXT: [[TMP32:%.*]] = extractvalue { float, float } [[TMP30]], 0 | ||
; VF2IC2-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]] | ||
; VF2IC2-NEXT: [[TMP34:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 1 | ||
; VF2IC2-NEXT: [[TMP35:%.*]] = fdiv float [[TMP32]], [[TMP34]] | ||
; VF2IC2-NEXT: store float [[TMP35]], ptr [[TMP33]], align 8 | ||
; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE7]] | ||
; VF2IC2: [[PRED_STORE_CONTINUE7]]: | ||
; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 | ||
; VF2IC2-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 | ||
; VF2IC2-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] | ||
; VF2IC2: [[MIDDLE_BLOCK]]: | ||
; | ||
entry: | ||
br label %for.body | ||
|
||
for.body: | ||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ] | ||
%arrayidx = getelementptr inbounds float, ptr %a, i64 %iv | ||
%in_val = load float, ptr %arrayidx, align 8 | ||
%sgt_zero = fcmp ogt float %in_val, 0.0 | ||
br i1 %sgt_zero, label %if.then, label %for.inc | ||
|
||
if.then: | ||
%call = tail call { float, float } @fn2(float %in_val) #3 | ||
%extract_a = extractvalue { float, float } %call, 0 | ||
%div = fdiv float %extract_a, %in_val | ||
store float %div, ptr %arrayidx, align 8 | ||
br label %for.inc | ||
|
||
for.inc: | ||
%iv.next = add nuw nsw i64 %iv, 1 | ||
%exitcond.not = icmp eq i64 %iv.next, 1024 | ||
br i1 %exitcond.not, label %exit, label %for.body | ||
|
||
exit: | ||
ret void | ||
} | ||
|
||
declare { i64 } @fn1(float) | ||
declare { float, float } @fn2(float) | ||
declare { i32, i32, i32 } @fn3(i32) | ||
|
@@ -464,3 +682,4 @@ declare { <8 x i32>, <8 x i32>, <8 x i32> } @fixed_vec_fn3(<8 x i32>) | |
attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVnN8v_fn1(fixed_vec_fn1)" } | ||
attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVnN8v_fn2(fixed_vec_fn2)" } | ||
attributes #2 = { nounwind "vector-function-abi-variant"="_ZGVnN8v_fn3(fixed_vec_fn3)" } | ||
attributes #3 = { nounwind "vector-function-abi-variant"="_ZGVnM8v_fn2(fixed_vec_fn2)" } |
Uh oh!
There was an error while loading. Please reload this page.