-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[VectorCombine] Try to scalarize vector loads feeding bitcast instructions. #164682
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
6fcd377
34b4f53
03f9c70
401c933
825f1c7
c2db71e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -129,7 +129,9 @@ class VectorCombine { | |||||||||||
| bool foldExtractedCmps(Instruction &I); | ||||||||||||
| bool foldBinopOfReductions(Instruction &I); | ||||||||||||
| bool foldSingleElementStore(Instruction &I); | ||||||||||||
| bool scalarizeLoadExtract(Instruction &I); | ||||||||||||
| bool scalarizeLoad(Instruction &I); | ||||||||||||
| bool scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy, Value *Ptr); | ||||||||||||
| bool scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy, Value *Ptr); | ||||||||||||
| bool scalarizeExtExtract(Instruction &I); | ||||||||||||
| bool foldConcatOfBoolMasks(Instruction &I); | ||||||||||||
| bool foldPermuteOfBinops(Instruction &I); | ||||||||||||
|
|
@@ -1845,49 +1847,42 @@ bool VectorCombine::foldSingleElementStore(Instruction &I) { | |||||||||||
| return false; | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| /// Try to scalarize vector loads feeding extractelement instructions. | ||||||||||||
| bool VectorCombine::scalarizeLoadExtract(Instruction &I) { | ||||||||||||
| if (!TTI.allowVectorElementIndexingUsingGEP()) | ||||||||||||
| return false; | ||||||||||||
|
|
||||||||||||
| /// Try to scalarize vector loads feeding extractelement or bitcast | ||||||||||||
| /// instructions. | ||||||||||||
| bool VectorCombine::scalarizeLoad(Instruction &I) { | ||||||||||||
| Value *Ptr; | ||||||||||||
| if (!match(&I, m_Load(m_Value(Ptr)))) | ||||||||||||
| return false; | ||||||||||||
|
|
||||||||||||
| auto *LI = cast<LoadInst>(&I); | ||||||||||||
| auto *VecTy = cast<VectorType>(LI->getType()); | ||||||||||||
| if (LI->isVolatile() || !DL->typeSizeEqualsStoreSize(VecTy->getScalarType())) | ||||||||||||
| if (!VecTy || LI->isVolatile() || | ||||||||||||
| !DL->typeSizeEqualsStoreSize(VecTy->getScalarType())) | ||||||||||||
| return false; | ||||||||||||
|
|
||||||||||||
| InstructionCost OriginalCost = | ||||||||||||
| TTI.getMemoryOpCost(Instruction::Load, VecTy, LI->getAlign(), | ||||||||||||
| LI->getPointerAddressSpace(), CostKind); | ||||||||||||
| InstructionCost ScalarizedCost = 0; | ||||||||||||
|
|
||||||||||||
| // Check what type of users we have and ensure no memory modifications betwwen | ||||||||||||
| // the load and its users. | ||||||||||||
|
||||||||||||
| // Check what type of users we have and ensure no memory modifications betwwen | |
| // the load and its users. | |
| // Check what type of users we have (must either all be extracts or bitcasts) and ensure no memory modifications between | |
| // the load and its users. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done, thanks!
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| if (!UI || UI->getParent() != LI->getParent() || UI->use_empty()) | |
| return false; | |
| if (!UI || UI->getParent() != LI->getParent()) | |
| return false; |
also checked below, with explanation in comment?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oops, yes, dropped, thanks!
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can sink this after the FailGuard, closer to the loop that sets them
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done, thanks
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| // All bitcasts should target the same scalar type. | |
| // All bitcasts must target the same scalar type. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done, thanks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| if (!TargetScalarType) | |
| return false; | |
| if (!TargetScalarType) | |
| return false; | |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done, thanks
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,136 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 | ||
| ; RUN: opt -passes=vector-combine -mtriple=arm64-apple-darwinos -S %s | FileCheck %s | ||
|
|
||
| define i32 @load_v4i8_bitcast_to_i32(ptr %x) { | ||
| ; CHECK-LABEL: define i32 @load_v4i8_bitcast_to_i32( | ||
| ; CHECK-SAME: ptr [[X:%.*]]) { | ||
| ; CHECK-NEXT: [[R_SCALAR:%.*]] = load i32, ptr [[X]], align 4 | ||
| ; CHECK-NEXT: ret i32 [[R_SCALAR]] | ||
| ; | ||
| %lv = load <4 x i8>, ptr %x | ||
| %r = bitcast <4 x i8> %lv to i32 | ||
| ret i32 %r | ||
| } | ||
|
|
||
| define i64 @load_v2i32_bitcast_to_i64(ptr %x) { | ||
| ; CHECK-LABEL: define i64 @load_v2i32_bitcast_to_i64( | ||
| ; CHECK-SAME: ptr [[X:%.*]]) { | ||
| ; CHECK-NEXT: [[R_SCALAR:%.*]] = load i64, ptr [[X]], align 8 | ||
| ; CHECK-NEXT: ret i64 [[R_SCALAR]] | ||
| ; | ||
| %lv = load <2 x i32>, ptr %x | ||
| %r = bitcast <2 x i32> %lv to i64 | ||
| ret i64 %r | ||
| } | ||
|
|
||
| define float @load_v4i8_bitcast_to_float(ptr %x) { | ||
| ; CHECK-LABEL: define float @load_v4i8_bitcast_to_float( | ||
| ; CHECK-SAME: ptr [[X:%.*]]) { | ||
| ; CHECK-NEXT: [[R_SCALAR:%.*]] = load float, ptr [[X]], align 4 | ||
| ; CHECK-NEXT: ret float [[R_SCALAR]] | ||
| ; | ||
| %lv = load <4 x i8>, ptr %x | ||
| %r = bitcast <4 x i8> %lv to float | ||
| ret float %r | ||
| } | ||
|
|
||
| define float @load_v2i16_bitcast_to_float(ptr %x) { | ||
| ; CHECK-LABEL: define float @load_v2i16_bitcast_to_float( | ||
| ; CHECK-SAME: ptr [[X:%.*]]) { | ||
| ; CHECK-NEXT: [[R_SCALAR:%.*]] = load float, ptr [[X]], align 4 | ||
| ; CHECK-NEXT: ret float [[R_SCALAR]] | ||
| ; | ||
| %lv = load <2 x i16>, ptr %x | ||
| %r = bitcast <2 x i16> %lv to float | ||
| ret float %r | ||
| } | ||
|
|
||
| define double @load_v4i16_bitcast_to_double(ptr %x) { | ||
| ; CHECK-LABEL: define double @load_v4i16_bitcast_to_double( | ||
| ; CHECK-SAME: ptr [[X:%.*]]) { | ||
| ; CHECK-NEXT: [[LV:%.*]] = load <4 x i16>, ptr [[X]], align 8 | ||
| ; CHECK-NEXT: [[R_SCALAR:%.*]] = bitcast <4 x i16> [[LV]] to double | ||
| ; CHECK-NEXT: ret double [[R_SCALAR]] | ||
| ; | ||
| %lv = load <4 x i16>, ptr %x | ||
| %r = bitcast <4 x i16> %lv to double | ||
| ret double %r | ||
| } | ||
|
|
||
| define double @load_v2i32_bitcast_to_double(ptr %x) { | ||
| ; CHECK-LABEL: define double @load_v2i32_bitcast_to_double( | ||
| ; CHECK-SAME: ptr [[X:%.*]]) { | ||
| ; CHECK-NEXT: [[LV:%.*]] = load <2 x i32>, ptr [[X]], align 8 | ||
| ; CHECK-NEXT: [[R_SCALAR:%.*]] = bitcast <2 x i32> [[LV]] to double | ||
| ; CHECK-NEXT: ret double [[R_SCALAR]] | ||
| ; | ||
| %lv = load <2 x i32>, ptr %x | ||
| %r = bitcast <2 x i32> %lv to double | ||
| ret double %r | ||
| } | ||
|
|
||
| ; Multiple users with the same bitcast type should be scalarized. | ||
| define i32 @load_v4i8_bitcast_multiple_users_same_type(ptr %x) { | ||
| ; CHECK-LABEL: define i32 @load_v4i8_bitcast_multiple_users_same_type( | ||
| ; CHECK-SAME: ptr [[X:%.*]]) { | ||
| ; CHECK-NEXT: [[LV_SCALAR:%.*]] = load i32, ptr [[X]], align 4 | ||
| ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LV_SCALAR]], [[LV_SCALAR]] | ||
| ; CHECK-NEXT: ret i32 [[ADD]] | ||
| ; | ||
| %lv = load <4 x i8>, ptr %x | ||
| %r1 = bitcast <4 x i8> %lv to i32 | ||
| %r2 = bitcast <4 x i8> %lv to i32 | ||
| %add = add i32 %r1, %r2 | ||
| ret i32 %add | ||
| } | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Aren't other passes likely to have already folded these duplication or are you seeing this kind of thing in real world code?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think for larger the motivating cases this improves results in combination with extend scalarization. @juliannagele could you add a larger case showing the interaction?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, now that #164683 is in we can potentially scalarize a load-ext-extend sequence. Added a test that show this. |
||
|
|
||
| ; Different bitcast types should not be scalarized. | ||
| define i32 @load_v4i8_bitcast_multiple_users_different_types(ptr %x) { | ||
| ; CHECK-LABEL: define i32 @load_v4i8_bitcast_multiple_users_different_types( | ||
| ; CHECK-SAME: ptr [[X:%.*]]) { | ||
| ; CHECK-NEXT: [[LV:%.*]] = load <4 x i8>, ptr [[X]], align 4 | ||
| ; CHECK-NEXT: [[R1:%.*]] = bitcast <4 x i8> [[LV]] to i32 | ||
| ; CHECK-NEXT: [[R2:%.*]] = bitcast <4 x i8> [[LV]] to float | ||
| ; CHECK-NEXT: [[R2_INT:%.*]] = bitcast float [[R2]] to i32 | ||
| ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[R1]], [[R2_INT]] | ||
| ; CHECK-NEXT: ret i32 [[ADD]] | ||
| ; | ||
| %lv = load <4 x i8>, ptr %x | ||
| %r1 = bitcast <4 x i8> %lv to i32 | ||
| %r2 = bitcast <4 x i8> %lv to float | ||
| %r2.int = bitcast float %r2 to i32 | ||
| %add = add i32 %r1, %r2.int | ||
| ret i32 %add | ||
| } | ||
|
|
||
| ; Bitcast to vector should not be scalarized. | ||
| define <2 x i16> @load_v4i8_bitcast_to_vector(ptr %x) { | ||
| ; CHECK-LABEL: define <2 x i16> @load_v4i8_bitcast_to_vector( | ||
| ; CHECK-SAME: ptr [[X:%.*]]) { | ||
| ; CHECK-NEXT: [[LV:%.*]] = load <4 x i8>, ptr [[X]], align 4 | ||
| ; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i8> [[LV]] to <2 x i16> | ||
| ; CHECK-NEXT: ret <2 x i16> [[R]] | ||
| ; | ||
| %lv = load <4 x i8>, ptr %x | ||
| %r = bitcast <4 x i8> %lv to <2 x i16> | ||
| ret <2 x i16> %r | ||
| } | ||
|
|
||
| ; Load with both bitcast users and other users should not be scalarized. | ||
| define i32 @load_v4i8_mixed_users(ptr %x) { | ||
| ; CHECK-LABEL: define i32 @load_v4i8_mixed_users( | ||
| ; CHECK-SAME: ptr [[X:%.*]]) { | ||
| ; CHECK-NEXT: [[LV:%.*]] = load <4 x i8>, ptr [[X]], align 4 | ||
| ; CHECK-NEXT: [[R1:%.*]] = bitcast <4 x i8> [[LV]] to i32 | ||
| ; CHECK-NEXT: [[R2:%.*]] = extractelement <4 x i8> [[LV]], i32 0 | ||
| ; CHECK-NEXT: [[R2_EXT:%.*]] = zext i8 [[R2]] to i32 | ||
| ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[R1]], [[R2_EXT]] | ||
| ; CHECK-NEXT: ret i32 [[ADD]] | ||
| ; | ||
| %lv = load <4 x i8>, ptr %x | ||
| %r1 = bitcast <4 x i8> %lv to i32 | ||
| %r2 = extractelement <4 x i8> %lv, i32 0 | ||
| %r2.ext = zext i8 %r2 to i32 | ||
| %add = add i32 %r1, %r2.ext | ||
| ret i32 %add | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this need checking now, can the function be called for scalar loads?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, no, left-over from a first try, dropped, thanks!