|
86 | 86 | #include "llvm/IR/Function.h" |
87 | 87 | #include "llvm/IR/GetElementPtrTypeIterator.h" |
88 | 88 | #include "llvm/IR/IRBuilder.h" |
| 89 | +#include "llvm/IR/InstIterator.h" |
89 | 90 | #include "llvm/IR/InstrTypes.h" |
90 | 91 | #include "llvm/IR/Instruction.h" |
91 | 92 | #include "llvm/IR/Instructions.h" |
@@ -338,6 +339,10 @@ class Vectorizer { |
338 | 339 | /// Postcondition: For all i, ret[i][0].second == 0, because the first instr |
339 | 340 | /// in the chain is the leader, and an instr touches distance 0 from itself. |
340 | 341 | std::vector<Chain> gatherChains(ArrayRef<Instruction *> Instrs); |
| 342 | + |
| 343 | + /// Attempts to prepare atomic read-modify-write instructions for |
| 344 | + /// vectorization. |
| 345 | + bool prepareAtomicInstOps(AtomicRMWInst *); |
341 | 346 | }; |
342 | 347 |
|
343 | 348 | class LoadStoreVectorizerLegacyPass : public FunctionPass { |
@@ -419,8 +424,94 @@ PreservedAnalyses LoadStoreVectorizerPass::run(Function &F, |
419 | 424 | return Changed ? PA : PreservedAnalyses::all(); |
420 | 425 | } |
421 | 426 |
|
| 427 | +/** |
| 428 | + * @brief Prepare operands of atomicrmw instructions for vectorization. |
| 429 | + * |
| 430 | + * Ensures the given AtomicRMWInst's pointer and value operands meet type |
| 431 | + * requirements and are load instructions. Inserts necessary bitcast and |
| 432 | + * inttoptr instructions. |
| 433 | + * |
| 434 | + * @param AI Pointer to the AtomicRMWInst in question. |
| 435 | + * @return true if the operands were successfully prepared, false otherwise. |
| 436 | + */ |
| 437 | +bool Vectorizer::prepareAtomicInstOps(AtomicRMWInst *AI) { |
| 438 | + if (AI->isVolatile() || AI->hasMetadata("amdgpu.no.fine.grained.memory")) |
| 439 | + return false; |
| 440 | + |
| 441 | + auto *Ptr = AI->getPointerOperand(); |
| 442 | + auto *PtrTy = Ptr->getType(); |
| 443 | + auto *Val = AI->getValOperand(); |
| 444 | + auto *ValTy = Val->getType(); |
| 445 | + |
| 446 | + if (!PtrTy->isPointerTy()) |
| 447 | + return false; |
| 448 | + |
| 449 | + // Only cast if the value operand type is |
| 450 | + // <2 x half>, <2 x i16>, <4 x i8>, f32, or i32 |
| 451 | + bool ValTyIsOkay = false; |
| 452 | + if (auto *VTy = dyn_cast<FixedVectorType>(ValTy)) { |
| 453 | + if (VTy->getNumElements() == 2) { |
| 454 | + if (VTy->getElementType()->isHalfTy()) |
| 455 | + ValTyIsOkay = true; |
| 456 | + if (VTy->getElementType()->isIntegerTy(16)) |
| 457 | + ValTyIsOkay = true; |
| 458 | + } else if (VTy->getNumElements() == 4 && |
| 459 | + VTy->getElementType()->isIntegerTy(8)) { |
| 460 | + ValTyIsOkay = true; |
| 461 | + } |
| 462 | + } else { |
| 463 | + if (ValTy->isFloatTy()) |
| 464 | + ValTyIsOkay = true; |
| 465 | + if (ValTy->isIntegerTy(32)) |
| 466 | + ValTyIsOkay = true; |
| 467 | + } |
| 468 | + if (!ValTyIsOkay) |
| 469 | + return false; |
| 470 | + |
| 471 | + // Walk up the chain of instructions to find the load instruction |
| 472 | + auto GetLoadInst = [](Value *Ptr) -> LoadInst * { |
| 473 | + while (Ptr) { |
| 474 | + if (!isa<Instruction>(Ptr)) |
| 475 | + return nullptr; |
| 476 | + if (auto *LI = dyn_cast<LoadInst>(Ptr)) |
| 477 | + return LI; |
| 478 | + if (isa<GetElementPtrInst>(Ptr)) |
| 479 | + return nullptr; |
| 480 | + if (auto *PtrInst = dyn_cast<Instruction>(Ptr)) |
| 481 | + Ptr = PtrInst->getOperand(0); |
| 482 | + else |
| 483 | + return nullptr; |
| 484 | + } |
| 485 | + return nullptr; |
| 486 | + }; |
| 487 | + |
| 488 | + // Pointer and value operands must be load instructions to be vectorized |
| 489 | + auto *ValLoadInst = GetLoadInst(Val); |
| 490 | + auto *PtrLoadInst = GetLoadInst(Ptr); |
| 491 | + if (!ValLoadInst || !PtrLoadInst) |
| 492 | + return false; |
| 493 | + |
| 494 | + // Insert bitcast to replace atomicrmw value operand |
| 495 | + IRBuilder<> Builder(AI->getParent(), AI->getIterator()); |
| 496 | + ValLoadInst->mutateType(IntegerType::getInt32Ty(AI->getContext())); |
| 497 | + AI->setOperand(1, Builder.CreateBitCast(ValLoadInst, ValTy, |
| 498 | + ValLoadInst->getName() + ".bitcast")); |
| 499 | + |
| 500 | + // Insert inttoptr to replace atomicrmw pointer operand |
| 501 | + PtrLoadInst->mutateType(IntegerType::getInt32Ty(AI->getContext())); |
| 502 | + AI->setOperand(0, |
| 503 | + Builder.CreateIntToPtr(PtrLoadInst, PtrTy, |
| 504 | + PtrLoadInst->getName() + ".inttoptr")); |
| 505 | + return true; |
| 506 | +} |
| 507 | + |
422 | 508 | bool Vectorizer::run() { |
423 | 509 | bool Changed = false; |
| 510 | + |
| 511 | + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) |
| 512 | + if (auto *AI = dyn_cast<AtomicRMWInst>(&*I)) |
| 513 | + Changed |= prepareAtomicInstOps(AI); |
| 514 | + |
424 | 515 | // Break up the BB if there are any instrs which aren't guaranteed to transfer |
425 | 516 | // execution to their successor. |
426 | 517 | // |
|
0 commit comments