@@ -549,6 +549,7 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
549549 assert (Itr != RedVarMap.end () && " Metadata not found" );
550550
551551 const CodeGenModule::XteamRedVarInfo &RVI = Itr->second ;
552+ llvm::Type *RedVarType = ConvertTypeForMem (XteamVD->getType ());
552553
553554 assert (RVI.ArgPos + 1 < Args->size () && " Arg position beyond bounds" );
554555
@@ -568,9 +569,9 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
568569 // {
569570 // RedVar += TeamVals[TeamID - 1]
570571 // }
571-
572572 Address ScanStorageValGEP = Address (
573- Builder.CreateGEP (Int32Ty, DScanStorage, GlobalGpuThreadId), Int32Ty,
573+ Builder.CreateGEP (RedVarType, DScanStorage, GlobalGpuThreadId),
574+ RedVarType,
574575 getContext ().getTypeAlignInChars (
575576 XteamVD->getType ())); // Storage[GlobalTID]
576577 Builder.CreateStore (Builder.CreateLoad (ScanStorageValGEP),
@@ -586,10 +587,10 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
586587 EmitBlock (IsAfterFirstTeamThenBlock);
587588 Address PrevTeamValGEP =
588589 Address (Builder.CreateGEP (
589- Int32Ty , DTeamVals,
590+ RedVarType , DTeamVals,
590591 Builder.CreateSub (WorkGroupId,
591592 llvm::ConstantInt::get (Int32Ty, 1 ))),
592- Int32Ty ,
593+ RedVarType ,
593594 getContext ().getTypeAlignInChars (
594595 XteamVD->getType ())); // TeamVals[TeamID - 1]
595596 Builder.CreateStore (Builder.CreateAdd (Builder.CreateLoad (RVI.RedVarAddr ),
@@ -614,7 +615,7 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
614615 // }
615616 // }
616617
617- Builder.CreateStore (llvm::ConstantInt::get (Int32Ty , 0 ),
618+ Builder.CreateStore (llvm::ConstantInt::get (RedVarType , 0 ),
618619 RVI.RedVarAddr ); // RedVar = 0
619620 llvm::Value *IsNotFirstThread = Builder.CreateICmpUGE (
620621 GlobalGpuThreadId,
@@ -630,8 +631,8 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
630631 GlobalGpuThreadId,
631632 llvm::ConstantInt::get (Int32Ty, 1 )); // GlobalTID - 1
632633 Address ScanStoragePrevValGEP = Address (
633- Builder.CreateGEP (Int32Ty , DScanStorage, PrevGlobalGpuThreadId),
634- Int32Ty ,
634+ Builder.CreateGEP (RedVarType , DScanStorage, PrevGlobalGpuThreadId),
635+ RedVarType ,
635636 getContext ().getTypeAlignInChars (
636637 XteamVD->getType ())); // Storage[GlobalTID - 1]
637638 Builder.CreateStore (Builder.CreateLoad (ScanStoragePrevValGEP),
@@ -656,10 +657,10 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
656657 EmitBlock (IsNotFirstThreadInTeamThenBlock);
657658 Address PrevTeamValGEP =
658659 Address (Builder.CreateGEP (
659- Int32Ty , DTeamVals,
660+ RedVarType , DTeamVals,
660661 Builder.CreateSub (WorkGroupId,
661662 llvm::ConstantInt::get (Int32Ty, 1 ))),
662- Int32Ty ,
663+ RedVarType ,
663664 getContext ().getTypeAlignInChars (
664665 XteamVD->getType ())); // TeamVals[TeamID - 1]
665666 Builder.CreateStore (Builder.CreateAdd (Builder.CreateLoad (RVI.RedVarAddr ),
@@ -676,10 +677,10 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
676677 EmitBlock (IsAfterSecondTeamThenBlock);
677678 Address PrevPrevTeamValGEP =
678679 Address (Builder.CreateGEP (
679- Int32Ty , DTeamVals,
680+ RedVarType , DTeamVals,
680681 Builder.CreateSub (WorkGroupId,
681682 llvm::ConstantInt::get (Int32Ty, 2 ))),
682- Int32Ty ,
683+ RedVarType ,
683684 getContext ().getTypeAlignInChars (
684685 XteamVD->getType ())); // TeamVals[TeamID - 2]
685686 Builder.CreateStore (
@@ -2307,12 +2308,12 @@ void CodeGenFunction::EmitForStmtWithArgs(const ForStmt &S,
23072308
23082309 llvm::Value *SegmentLoopUB = nullptr ;
23092310 llvm::Value *DSegmentVals = nullptr ;
2310- llvm::Value *ThreadLevelRes = nullptr ;
23112311 llvm::Value *GlobalUpperBound = nullptr ;
23122312 const Address *RedVarAddr = nullptr ;
23132313 llvm::BasicBlock *ExecBB = nullptr ;
23142314 llvm::BasicBlock *DoneBB = nullptr ;
2315- clang::QualType RedVarType;
2315+ const clang::VarDecl *XteamVD;
2316+ llvm::Type *RedVarType;
23162317 if (getLangOpts ().OpenMPIsTargetDevice && CGM.isXteamSegmentedScanKernel ()) {
23172318 // Compute Loop trip-count (N) = GlobalUB - GlobalLB + 1
23182319 const auto UBLValue = EmitLValue (
@@ -2368,19 +2369,19 @@ void CodeGenFunction::EmitForStmtWithArgs(const ForStmt &S,
23682369 Builder.CreateMul (SegmentSizeForScan, GlobalGpuThreadId),
23692370 BigJumpLoopIvAddr); // *iv = GlobalTID * Seg_Size
23702371
2371- // Every thread loops till just before the SegmentLoopUB = (GlobaTID + 1) *
2372- // Seg_Size
2372+ // Every thread loops till just before the SegmentLoopUB:
2373+ // SegmentLoopUB = (GlobaTID + 1) * Seg_Size
23732374 SegmentLoopUB = Builder.CreateMul (
23742375 SegmentSizeForScan,
23752376 Builder.CreateAdd (GlobalGpuThreadId,
23762377 llvm::ConstantInt::get (Int32Ty, 1 )));
23772378
2378- auto XteamVD = *(CGM.getXteamOrderedRedVar (&S).begin ());
2379+ XteamVD = *(CGM.getXteamOrderedRedVar (&S).begin ());
2380+ RedVarType = ConvertTypeForMem (XteamVD->getType ());
23792381 const CodeGenModule::XteamRedVarMap &RedVarMap = CGM.getXteamRedVarMap (&S);
23802382 const CodeGenModule::XteamRedVarInfo &RVI =
23812383 (RedVarMap.find (XteamVD))->second ;
23822384 RedVarAddr = &(RVI.RedVarAddr );
2383- RedVarType = XteamVD->getType ();
23842385
23852386 // SegmentValsAddr points to the SegmentVals array which will store the
23862387 // intermediate scan results computed per segment by a single thread
@@ -2520,11 +2521,12 @@ void CodeGenFunction::EmitForStmtWithArgs(const ForStmt &S,
25202521 if (!CGM.isXteamScanPhaseOne ) {
25212522 // SegmentVals contains the final scanned results computed for every
25222523 // element in a segment.
2523- Address SegmentValsGEP = Address (
2524- Builder.CreateGEP (Int32Ty, DSegmentVals,
2525- Builder.CreateLoad (BigJumpLoopIvAddr)),
2526- Int32Ty,
2527- getContext ().getTypeAlignInChars (RedVarType)); // SegmentVals[*iv]
2524+ Address SegmentValsGEP =
2525+ Address (Builder.CreateGEP (RedVarType, DSegmentVals,
2526+ Builder.CreateLoad (BigJumpLoopIvAddr)),
2527+ RedVarType,
2528+ getContext ().getTypeAlignInChars (
2529+ XteamVD->getType ())); // SegmentVals[*iv]
25282530 // emit redvar = SegmentVals[omp.iv]
25292531 Builder.CreateStore (Builder.CreateLoad (SegmentValsGEP), *RedVarAddr);
25302532 }
@@ -2548,11 +2550,12 @@ void CodeGenFunction::EmitForStmtWithArgs(const ForStmt &S,
25482550 (CGM.isXteamRedKernel (&S) || CGM.isBigJumpLoopKernel (&S))) {
25492551 if (CGM.isXteamSegmentedScanKernel ()) {
25502552 EmitBlock (Continue.getBlock ());
2551- Address SegmentValsGEP = Address (
2552- Builder.CreateGEP (Int32Ty, DSegmentVals,
2553- Builder.CreateLoad (BigJumpLoopIvAddr)),
2554- Int32Ty,
2555- getContext ().getTypeAlignInChars (RedVarType)); // Segment_Vals[*iv]
2553+ Address SegmentValsGEP =
2554+ Address (Builder.CreateGEP (RedVarType, DSegmentVals,
2555+ Builder.CreateLoad (BigJumpLoopIvAddr)),
2556+ RedVarType,
2557+ getContext ().getTypeAlignInChars (
2558+ XteamVD->getType ())); // Segment_Vals[*iv]
25562559 Builder.CreateStore (Builder.CreateLoad (*RedVarAddr),
25572560 SegmentValsGEP); // Segment_Vals[*iv] = red_var
25582561 llvm::Value *SegmentScanLoopInc =
0 commit comments