Skip to content

Commit f1147a0

Browse files
committed
rebase
Created using spr 1.3.8-beta.1
2 parents f8f1935 + 6da06ac commit f1147a0

File tree

4 files changed

+28
-24
lines changed

4 files changed

+28
-24
lines changed

clang/lib/Headers/avx512fp16intrin.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,17 +193,17 @@ _mm512_castsi512_ph(__m512i __a) {
193193
return (__m512h)__a;
194194
}
195195

196-
static __inline__ __m128h __DEFAULT_FN_ATTRS256
196+
static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
197197
_mm256_castph256_ph128(__m256h __a) {
198198
return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7);
199199
}
200200

201-
static __inline__ __m128h __DEFAULT_FN_ATTRS512
201+
static __inline__ __m128h __DEFAULT_FN_ATTRS512_CONSTEXPR
202202
_mm512_castph512_ph128(__m512h __a) {
203203
return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7);
204204
}
205205

206-
static __inline__ __m256h __DEFAULT_FN_ATTRS512
206+
static __inline__ __m256h __DEFAULT_FN_ATTRS512_CONSTEXPR
207207
_mm512_castph512_ph256(__m512h __a) {
208208
return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
209209
12, 13, 14, 15);

clang/test/CodeGen/X86/avx512fp16-builtins.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,18 +341,21 @@ __m128h test_mm256_castph256_ph128(__m256h __a) {
341341
// CHECK: shufflevector <16 x half> %{{.*}}, <16 x half> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
342342
return _mm256_castph256_ph128(__a);
343343
}
344+
TEST_CONSTEXPR(match_m128h(_mm256_castph256_ph128((__m256h){-1.0, 2.0, -3.0, 4.0, -5.0, 6.0, -7.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0, 14.0, -15.0, -16.0}), -1.0, 2.0, -3.0, 4.0, -5.0, 6.0, -7.0, 8.0));
344345

345346
__m128h test_mm512_castph512_ph128(__m512h __a) {
346347
// CHECK-LABEL: test_mm512_castph512_ph128
347348
// CHECK: shufflevector <32 x half> %{{.*}}, <32 x half> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
348349
return _mm512_castph512_ph128(__a);
349350
}
351+
TEST_CONSTEXPR(match_m128h(_mm512_castph512_ph128((__m512h){0.0, -1.0, 2.0, -3.0, 4.0, -5.0, 6.0, -7.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0, 14.0, -15.0, -16.0, -17.0, 18.0, -19.0, 20.0, -21.0, 22.0, -23.0, 24.0, -25.0, 26.0, -27.0, 28.0, -29.0, 30.0, -31.0}), 0.0, -1.0, 2.0, -3.0, 4.0, -5.0, 6.0, -7.0));
350352

351353
__m256h test_mm512_castph512_ph256(__m512h __a) {
352354
// CHECK-LABEL: test_mm512_castph512_ph256
353355
// CHECK: shufflevector <32 x half> %{{.*}}, <32 x half> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
354356
return _mm512_castph512_ph256(__a);
355357
}
358+
TEST_CONSTEXPR(match_m256h(_mm512_castph512_ph256((__m512h){-1.0, 2.0, -3.0, 4.0, -5.0, 6.0, -7.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0, 14.0, -15.0, -16.0, -17.0, 18.0, -19.0, 20.0, -21.0, 22.0, -23.0, 24.0, -25.0, 26.0, -27.0, 28.0, -29.0, 30.0, -31.0, 32.0}), -1.0, 2.0, -3.0, 4.0, -5.0, 6.0, -7.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0, 14.0, -15.0, -16.0));
356359

357360
__m256h test_mm256_castph128_ph256(__m128h __a) {
358361
// CHECK-LABEL: test_mm256_castph128_ph256

llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2662,7 +2662,7 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB,
26622662
G->eraseFromParent();
26632663
NewGlobals[i] = NewGlobal;
26642664

2665-
Constant *ODRIndicator = ConstantPointerNull::get(PtrTy);
2665+
Constant *ODRIndicator = Constant::getNullValue(IntptrTy);
26662666
GlobalValue *InstrumentedGlobal = NewGlobal;
26672667

26682668
bool CanUsePrivateAliases =
@@ -2677,8 +2677,7 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB,
26772677

26782678
// ODR should not happen for local linkage.
26792679
if (NewGlobal->hasLocalLinkage()) {
2680-
ODRIndicator =
2681-
ConstantExpr::getIntToPtr(ConstantInt::get(IntptrTy, -1), PtrTy);
2680+
ODRIndicator = ConstantInt::get(IntptrTy, -1);
26822681
} else if (UseOdrIndicator) {
26832682
// With local aliases, we need to provide another externally visible
26842683
// symbol __odr_asan_XXX to detect ODR violation.
@@ -2692,7 +2691,7 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB,
26922691
ODRIndicatorSym->setVisibility(NewGlobal->getVisibility());
26932692
ODRIndicatorSym->setDLLStorageClass(NewGlobal->getDLLStorageClass());
26942693
ODRIndicatorSym->setAlignment(Align(1));
2695-
ODRIndicator = ODRIndicatorSym;
2694+
ODRIndicator = ConstantExpr::getPtrToInt(ODRIndicatorSym, IntptrTy);
26962695
}
26972696

26982697
Constant *Initializer = ConstantStruct::get(
@@ -2703,8 +2702,7 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB,
27032702
ConstantExpr::getPointerCast(Name, IntptrTy),
27042703
ConstantExpr::getPointerCast(getOrCreateModuleName(), IntptrTy),
27052704
ConstantInt::get(IntptrTy, MD.IsDynInit),
2706-
Constant::getNullValue(IntptrTy),
2707-
ConstantExpr::getPointerCast(ODRIndicator, IntptrTy));
2705+
Constant::getNullValue(IntptrTy), ODRIndicator);
27082706

27092707
LLVM_DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n");
27102708

llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,8 @@ class DFAJumpThreading {
158158
void
159159
unfoldSelectInstrs(DominatorTree *DT,
160160
const SmallVector<SelectInstToUnfold, 4> &SelectInsts) {
161-
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
161+
// TODO: Have everything use a single lazy DTU
162+
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
162163
SmallVector<SelectInstToUnfold, 4> Stack(SelectInsts);
163164

164165
while (!Stack.empty()) {
@@ -820,11 +821,13 @@ struct TransformDFA {
820821
: SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE),
821822
EphValues(EphValues) {}
822823

823-
void run() {
824+
bool run() {
824825
if (isLegalAndProfitableToTransform()) {
825826
createAllExitPaths();
826827
NumTransforms++;
828+
return true;
827829
}
830+
return false;
828831
}
829832

830833
private:
@@ -975,8 +978,6 @@ struct TransformDFA {
975978

976979
/// Transform each threading path to effectively jump thread the DFA.
977980
void createAllExitPaths() {
978-
DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager);
979-
980981
// Move the switch block to the end of the path, since it will be duplicated
981982
BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock();
982983
for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
@@ -993,15 +994,18 @@ struct TransformDFA {
993994
SmallPtrSet<BasicBlock *, 16> BlocksToClean;
994995
BlocksToClean.insert_range(successors(SwitchBlock));
995996

996-
for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
997-
createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU);
998-
NumPaths++;
999-
}
997+
{
998+
DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
999+
for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
1000+
createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU);
1001+
NumPaths++;
1002+
}
10001003

1001-
// After all paths are cloned, now update the last successor of the cloned
1002-
// path so it skips over the switch statement
1003-
for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths())
1004-
updateLastSuccessor(TPath, DuplicateMap, &DTU);
1004+
// After all paths are cloned, now update the last successor of the cloned
1005+
// path so it skips over the switch statement
1006+
for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths())
1007+
updateLastSuccessor(TPath, DuplicateMap, &DTU);
1008+
}
10051009

10061010
// For each instruction that was cloned and used outside, update its uses
10071011
updateSSA(NewDefs);
@@ -1426,9 +1430,8 @@ bool DFAJumpThreading::run(Function &F) {
14261430

14271431
for (AllSwitchPaths SwitchPaths : ThreadableLoops) {
14281432
TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues);
1429-
Transform.run();
1430-
MadeChanges = true;
1431-
LoopInfoBroken = true;
1433+
if (Transform.run())
1434+
MadeChanges = LoopInfoBroken = true;
14321435
}
14331436

14341437
#ifdef EXPENSIVE_CHECKS

0 commit comments

Comments
 (0)