Skip to content

Commit 5eda221

Browse files
authored
Merge pull request #80 from AMD-Lightning-Internal/amd/dev/animkuma/xteam-scan-type-generic
[OpenMP][Clang][DeviceRTL] Support Multiple Datatypes for Xteam Scan
2 parents 03c4267 + 755f808 commit 5eda221

File tree

8 files changed

+3957
-55
lines changed

8 files changed

+3957
-55
lines changed

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 283 additions & 22 deletions
Large diffs are not rendered by default.

clang/lib/CodeGen/CGStmt.cpp

Lines changed: 30 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,7 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
549549
assert(Itr != RedVarMap.end() && "Metadata not found");
550550

551551
const CodeGenModule::XteamRedVarInfo &RVI = Itr->second;
552+
llvm::Type *RedVarType = ConvertTypeForMem(XteamVD->getType());
552553

553554
assert(RVI.ArgPos + 1 < Args->size() && "Arg position beyond bounds");
554555

@@ -568,9 +569,9 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
568569
// {
569570
// RedVar += TeamVals[TeamID - 1]
570571
// }
571-
572572
Address ScanStorageValGEP = Address(
573-
Builder.CreateGEP(Int32Ty, DScanStorage, GlobalGpuThreadId), Int32Ty,
573+
Builder.CreateGEP(RedVarType, DScanStorage, GlobalGpuThreadId),
574+
RedVarType,
574575
getContext().getTypeAlignInChars(
575576
XteamVD->getType())); // Storage[GlobalTID]
576577
Builder.CreateStore(Builder.CreateLoad(ScanStorageValGEP),
@@ -586,10 +587,10 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
586587
EmitBlock(IsAfterFirstTeamThenBlock);
587588
Address PrevTeamValGEP =
588589
Address(Builder.CreateGEP(
589-
Int32Ty, DTeamVals,
590+
RedVarType, DTeamVals,
590591
Builder.CreateSub(WorkGroupId,
591592
llvm::ConstantInt::get(Int32Ty, 1))),
592-
Int32Ty,
593+
RedVarType,
593594
getContext().getTypeAlignInChars(
594595
XteamVD->getType())); // TeamVals[TeamID - 1]
595596
Builder.CreateStore(Builder.CreateAdd(Builder.CreateLoad(RVI.RedVarAddr),
@@ -614,7 +615,7 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
614615
// }
615616
// }
616617

617-
Builder.CreateStore(llvm::ConstantInt::get(Int32Ty, 0),
618+
Builder.CreateStore(llvm::ConstantInt::get(RedVarType, 0),
618619
RVI.RedVarAddr); // RedVar = 0
619620
llvm::Value *IsNotFirstThread = Builder.CreateICmpUGE(
620621
GlobalGpuThreadId,
@@ -630,8 +631,8 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
630631
GlobalGpuThreadId,
631632
llvm::ConstantInt::get(Int32Ty, 1)); // GlobalTID - 1
632633
Address ScanStoragePrevValGEP = Address(
633-
Builder.CreateGEP(Int32Ty, DScanStorage, PrevGlobalGpuThreadId),
634-
Int32Ty,
634+
Builder.CreateGEP(RedVarType, DScanStorage, PrevGlobalGpuThreadId),
635+
RedVarType,
635636
getContext().getTypeAlignInChars(
636637
XteamVD->getType())); // Storage[GlobalTID - 1]
637638
Builder.CreateStore(Builder.CreateLoad(ScanStoragePrevValGEP),
@@ -656,10 +657,10 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
656657
EmitBlock(IsNotFirstThreadInTeamThenBlock);
657658
Address PrevTeamValGEP =
658659
Address(Builder.CreateGEP(
659-
Int32Ty, DTeamVals,
660+
RedVarType, DTeamVals,
660661
Builder.CreateSub(WorkGroupId,
661662
llvm::ConstantInt::get(Int32Ty, 1))),
662-
Int32Ty,
663+
RedVarType,
663664
getContext().getTypeAlignInChars(
664665
XteamVD->getType())); // TeamVals[TeamID - 1]
665666
Builder.CreateStore(Builder.CreateAdd(Builder.CreateLoad(RVI.RedVarAddr),
@@ -676,10 +677,10 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
676677
EmitBlock(IsAfterSecondTeamThenBlock);
677678
Address PrevPrevTeamValGEP =
678679
Address(Builder.CreateGEP(
679-
Int32Ty, DTeamVals,
680+
RedVarType, DTeamVals,
680681
Builder.CreateSub(WorkGroupId,
681682
llvm::ConstantInt::get(Int32Ty, 2))),
682-
Int32Ty,
683+
RedVarType,
683684
getContext().getTypeAlignInChars(
684685
XteamVD->getType())); // TeamVals[TeamID - 2]
685686
Builder.CreateStore(
@@ -2307,12 +2308,12 @@ void CodeGenFunction::EmitForStmtWithArgs(const ForStmt &S,
23072308

23082309
llvm::Value *SegmentLoopUB = nullptr;
23092310
llvm::Value *DSegmentVals = nullptr;
2310-
llvm::Value *ThreadLevelRes = nullptr;
23112311
llvm::Value *GlobalUpperBound = nullptr;
23122312
const Address *RedVarAddr = nullptr;
23132313
llvm::BasicBlock *ExecBB = nullptr;
23142314
llvm::BasicBlock *DoneBB = nullptr;
2315-
clang::QualType RedVarType;
2315+
const clang::VarDecl *XteamVD;
2316+
llvm::Type *RedVarType;
23162317
if (getLangOpts().OpenMPIsTargetDevice && CGM.isXteamSegmentedScanKernel()) {
23172318
// Compute Loop trip-count (N) = GlobalUB - GlobalLB + 1
23182319
const auto UBLValue = EmitLValue(
@@ -2368,19 +2369,19 @@ void CodeGenFunction::EmitForStmtWithArgs(const ForStmt &S,
23682369
Builder.CreateMul(SegmentSizeForScan, GlobalGpuThreadId),
23692370
BigJumpLoopIvAddr); // *iv = GlobalTID * Seg_Size
23702371

2371-
// Every thread loops till just before the SegmentLoopUB = (GlobaTID + 1) *
2372-
// Seg_Size
2372+
// Every thread loops till just before the SegmentLoopUB:
2373+
// SegmentLoopUB = (GlobaTID + 1) * Seg_Size
23732374
SegmentLoopUB = Builder.CreateMul(
23742375
SegmentSizeForScan,
23752376
Builder.CreateAdd(GlobalGpuThreadId,
23762377
llvm::ConstantInt::get(Int32Ty, 1)));
23772378

2378-
auto XteamVD = *(CGM.getXteamOrderedRedVar(&S).begin());
2379+
XteamVD = *(CGM.getXteamOrderedRedVar(&S).begin());
2380+
RedVarType = ConvertTypeForMem(XteamVD->getType());
23792381
const CodeGenModule::XteamRedVarMap &RedVarMap = CGM.getXteamRedVarMap(&S);
23802382
const CodeGenModule::XteamRedVarInfo &RVI =
23812383
(RedVarMap.find(XteamVD))->second;
23822384
RedVarAddr = &(RVI.RedVarAddr);
2383-
RedVarType = XteamVD->getType();
23842385

23852386
// SegmentValsAddr points to the SegmentVals array which will store the
23862387
// intermediate scan results computed per segment by a single thread
@@ -2520,11 +2521,12 @@ void CodeGenFunction::EmitForStmtWithArgs(const ForStmt &S,
25202521
if (!CGM.isXteamScanPhaseOne) {
25212522
// SegmentVals contains the final scanned results computed for every
25222523
// element in a segment.
2523-
Address SegmentValsGEP = Address(
2524-
Builder.CreateGEP(Int32Ty, DSegmentVals,
2525-
Builder.CreateLoad(BigJumpLoopIvAddr)),
2526-
Int32Ty,
2527-
getContext().getTypeAlignInChars(RedVarType)); // SegmentVals[*iv]
2524+
Address SegmentValsGEP =
2525+
Address(Builder.CreateGEP(RedVarType, DSegmentVals,
2526+
Builder.CreateLoad(BigJumpLoopIvAddr)),
2527+
RedVarType,
2528+
getContext().getTypeAlignInChars(
2529+
XteamVD->getType())); // SegmentVals[*iv]
25282530
// emit redvar = SegmentVals[omp.iv]
25292531
Builder.CreateStore(Builder.CreateLoad(SegmentValsGEP), *RedVarAddr);
25302532
}
@@ -2548,11 +2550,12 @@ void CodeGenFunction::EmitForStmtWithArgs(const ForStmt &S,
25482550
(CGM.isXteamRedKernel(&S) || CGM.isBigJumpLoopKernel(&S))) {
25492551
if (CGM.isXteamSegmentedScanKernel()) {
25502552
EmitBlock(Continue.getBlock());
2551-
Address SegmentValsGEP = Address(
2552-
Builder.CreateGEP(Int32Ty, DSegmentVals,
2553-
Builder.CreateLoad(BigJumpLoopIvAddr)),
2554-
Int32Ty,
2555-
getContext().getTypeAlignInChars(RedVarType)); // Segment_Vals[*iv]
2553+
Address SegmentValsGEP =
2554+
Address(Builder.CreateGEP(RedVarType, DSegmentVals,
2555+
Builder.CreateLoad(BigJumpLoopIvAddr)),
2556+
RedVarType,
2557+
getContext().getTypeAlignInChars(
2558+
XteamVD->getType())); // Segment_Vals[*iv]
25562559
Builder.CreateStore(Builder.CreateLoad(*RedVarAddr),
25572560
SegmentValsGEP); // Segment_Vals[*iv] = red_var
25582561
llvm::Value *SegmentScanLoopInc =

clang/lib/CodeGen/CGStmtOpenMP.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -415,10 +415,10 @@ void CodeGenFunction::InitializeXteamRedCapturedVars(
415415
llvm::Value *DScanStorageInst =
416416
Builder.CreateAlloca(RedVarType, nullptr, "d_scan_storage");
417417
Address DScanStorageAddr(
418-
DScanStorageInst, Int32Ty,
418+
DScanStorageInst, RedVarType,
419419
Context.getTypeAlignInChars(Context.UnsignedIntTy));
420420
llvm::Value *NullPtrDScanStorage =
421-
llvm::ConstantPointerNull::get(Int32Ty->getPointerTo());
421+
llvm::ConstantPointerNull::get(RedVarType->getPointerTo());
422422
Builder.CreateStore(NullPtrDScanStorage, DScanStorageAddr);
423423

424424
assert(DScanStorageInst && "Device scan storage pointer cannot be null");
@@ -428,10 +428,10 @@ void CodeGenFunction::InitializeXteamRedCapturedVars(
428428
llvm::Value *DSegmentValsInst =
429429
Builder.CreateAlloca(RedVarType, nullptr, "d_segment_vals");
430430
Address DSegmentValsAddr(
431-
DSegmentValsInst, Int32Ty,
431+
DSegmentValsInst, RedVarType,
432432
Context.getTypeAlignInChars(Context.UnsignedIntTy));
433433
llvm::Value *NullPtrDSegmentVals =
434-
llvm::ConstantPointerNull::get(Int32Ty->getPointerTo());
434+
llvm::ConstantPointerNull::get(RedVarType->getPointerTo());
435435
Builder.CreateStore(NullPtrDSegmentVals, DSegmentValsAddr);
436436

437437
assert(DSegmentValsInst && "Segment Vals Array pointer cannot be null");

0 commit comments

Comments
 (0)