Skip to content

Commit 1d26b11

Browse files
committed
Merge branch 'hgh/libcxx/P2255R2-A_type_trait_to_detect_reference_binding_to_temporary' of https://github.com/H-G-Hristov/llvm-project into hgh/libcxx/P2255R2-A_type_trait_to_detect_reference_binding_to_temporary
2 parents 4edecb2 + 665d81b commit 1d26b11

File tree

5 files changed

+99
-27
lines changed

5 files changed

+99
-27
lines changed

llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -600,12 +600,12 @@ class MemLocFragmentFill {
600600
break;
601601
}
602602

603-
auto CurrentLiveInEntry = LiveIn.find(&BB);
604603
// If there's no LiveIn entry for the block yet, add it.
605-
if (CurrentLiveInEntry == LiveIn.end()) {
604+
auto [CurrentLiveInEntry, Inserted] = LiveIn.try_emplace(&BB);
605+
if (Inserted) {
606606
LLVM_DEBUG(dbgs() << "change=true (first) on meet on " << BB.getName()
607607
<< "\n");
608-
LiveIn[&BB] = std::move(BBLiveIn);
608+
CurrentLiveInEntry->second = std::move(BBLiveIn);
609609
return /*Changed=*/true;
610610
}
611611

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5427,8 +5427,8 @@ void PPCInstrInfo::promoteInstr32To64ForElimEXTSW(const Register &Reg,
54275427
--Iter;
54285428
MachineInstrBuilder MIBuilder(*Iter->getMF(), Iter);
54295429
for (unsigned i = 1; i < MI->getNumOperands(); i++) {
5430-
if (PromoteRegs.find(i) != PromoteRegs.end())
5431-
MIBuilder.addReg(PromoteRegs[i], RegState::Kill);
5430+
if (auto It = PromoteRegs.find(i); It != PromoteRegs.end())
5431+
MIBuilder.addReg(It->second, RegState::Kill);
54325432
else
54335433
Iter->addOperand(MI->getOperand(i));
54345434
}

llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,6 @@ void SPIRVModuleAnalysis::visitDecl(
319319
std::map<const Value *, unsigned> &GlobalToGReg, const MachineFunction *MF,
320320
const MachineInstr &MI) {
321321
unsigned Opcode = MI.getOpcode();
322-
DenseSet<Register> Deps;
323322

324323
// Process each operand of the instruction to resolve dependencies
325324
for (const MachineOperand &MO : MI.operands()) {

mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1830,14 +1830,14 @@ static void getMultiLevelStrides(const MemRefRegion &region,
18301830
}
18311831
}
18321832

1833-
/// Generates a point-wise copy from/to `memref' to/from `fastMemRef' and
1834-
/// returns the outermost AffineForOp of the copy loop nest. `lbMaps` and
1835-
/// `ubMaps` along with `lbOperands` and `ubOperands` hold the lower and upper
1836-
/// bound information for the copy loop nest. `fastBufOffsets` contain the
1837-
/// expressions to be subtracted out from the respective copy loop iterators in
1838-
/// order to index the fast buffer. If `copyOut' is true, generates a copy-out;
1839-
/// otherwise a copy-in. Builder `b` should be set to the point the copy nest is
1840-
/// inserted.
1833+
/// Generates a point-wise copy from/to a non-zero ranked `memref' to/from
1834+
/// `fastMemRef' and returns the outermost AffineForOp of the copy loop nest.
1835+
/// `lbMaps` and `ubMaps` along with `lbOperands` and `ubOperands` hold the
1836+
/// lower and upper bound information for the copy loop nest. `fastBufOffsets`
1837+
/// contain the expressions to be subtracted out from the respective copy loop
1838+
/// iterators in order to index the fast buffer. If `copyOut' is true, generates
1839+
/// a copy-out; otherwise a copy-in. Builder `b` should be set to the point the
1840+
/// copy nest is inserted.
18411841
//
18421842
/// The copy-in nest is generated as follows as an example for a 2-d region:
18431843
/// for x = ...
@@ -1858,6 +1858,8 @@ generatePointWiseCopy(Location loc, Value memref, Value fastMemRef,
18581858
}));
18591859

18601860
unsigned rank = cast<MemRefType>(memref.getType()).getRank();
1861+
// A copy nest can't be generated for 0-ranked memrefs.
1862+
assert(rank != 0 && "non-zero rank memref expected");
18611863
assert(lbMaps.size() == rank && "wrong number of lb maps");
18621864
assert(ubMaps.size() == rank && "wrong number of ub maps");
18631865

@@ -1921,19 +1923,20 @@ emitRemarkForBlock(Block &block) {
19211923
return block.getParentOp()->emitRemark();
19221924
}
19231925

1924-
/// Creates a buffer in the faster memory space for the specified memref region;
1925-
/// generates a copy from the lower memory space to this one, and replaces all
1926-
/// loads/stores in the block range [`begin', `end') of `block' to load/store
1927-
/// from that buffer. Returns failure if copies could not be generated due to
1928-
/// yet unimplemented cases. `copyInPlacementStart` and `copyOutPlacementStart`
1929-
/// in copyPlacementBlock specify the insertion points where the incoming copies
1930-
/// and outgoing copies, respectively, should be inserted (the insertion happens
1931-
/// right before the insertion point). Since `begin` can itself be invalidated
1932-
/// due to the memref rewriting done from this method, the output argument
1933-
/// `nBegin` is set to its replacement (set to `begin` if no invalidation
1934-
/// happens). Since outgoing copies could have been inserted at `end`, the
1935-
/// output argument `nEnd` is set to the new end. `sizeInBytes` is set to the
1936-
/// size of the fast buffer allocated.
1926+
/// Creates a buffer in the faster memory space for the specified memref region
1927+
/// (memref has to be non-zero ranked); generates a copy from the lower memory
1928+
/// space to this one, and replaces all loads/stores in the block range
1929+
/// [`begin', `end') of `block' to load/store from that buffer. Returns failure
1930+
/// if copies could not be generated due to yet unimplemented cases.
1931+
/// `copyInPlacementStart` and `copyOutPlacementStart` in copyPlacementBlock
1932+
/// specify the insertion points where the incoming copies and outgoing copies,
1933+
/// respectively, should be inserted (the insertion happens right before the
1934+
/// insertion point). Since `begin` can itself be invalidated due to the memref
1935+
/// rewriting done from this method, the output argument `nBegin` is set to its
1936+
/// replacement (set to `begin` if no invalidation happens). Since outgoing
1937+
/// copies could have been inserted at `end`, the output argument `nEnd` is set
1938+
/// to the new end. `sizeInBytes` is set to the size of the fast buffer
1939+
/// allocated.
19371940
static LogicalResult generateCopy(
19381941
const MemRefRegion &region, Block *block, Block::iterator begin,
19391942
Block::iterator end, Block *copyPlacementBlock,
@@ -1984,6 +1987,11 @@ static LogicalResult generateCopy(
19841987
SmallVector<Value, 4> bufIndices;
19851988

19861989
unsigned rank = memRefType.getRank();
1990+
if (rank == 0) {
1991+
LLVM_DEBUG(llvm::dbgs() << "Non-zero ranked memrefs supported\n");
1992+
return failure();
1993+
}
1994+
19871995
SmallVector<int64_t, 4> fastBufferShape;
19881996

19891997
// Compute the extents of the buffer.

mlir/test/Dialect/Affine/affine-data-copy.mlir

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,3 +354,68 @@ func.func @arbitrary_memory_space() {
354354
}
355355
return
356356
}
357+
358+
// CHECK-LABEL: zero_ranked
359+
func.func @zero_ranked(%3:memref<480xi1>) {
360+
%false = arith.constant false
361+
%4 = memref.alloc() {alignment = 128 : i64} : memref<i1>
362+
affine.store %false, %4[] : memref<i1>
363+
%5 = memref.alloc() {alignment = 128 : i64} : memref<i1>
364+
memref.copy %4, %5 : memref<i1> to memref<i1>
365+
affine.for %arg0 = 0 to 480 {
366+
%11 = affine.load %3[%arg0] : memref<480xi1>
367+
%12 = affine.load %5[] : memref<i1>
368+
%13 = arith.cmpi slt, %11, %12 : i1
369+
%14 = arith.select %13, %11, %12 : i1
370+
affine.store %14, %5[] : memref<i1>
371+
}
372+
return
373+
}
374+
375+
// CHECK-LABEL: func @scalar_memref_copy_without_dma
376+
func.func @scalar_memref_copy_without_dma() {
377+
%false = arith.constant false
378+
%4 = memref.alloc() {alignment = 128 : i64} : memref<i1>
379+
affine.store %false, %4[] : memref<i1>
380+
381+
// CHECK: %[[FALSE:.*]] = arith.constant false
382+
// CHECK: %[[MEMREF:.*]] = memref.alloc() {alignment = 128 : i64} : memref<i1>
383+
// CHECK: affine.store %[[FALSE]], %[[MEMREF]][] : memref<i1>
384+
return
385+
}
386+
387+
// CHECK-LABEL: func @scalar_memref_copy_in_loop
388+
func.func @scalar_memref_copy_in_loop(%3:memref<480xi1>) {
389+
%false = arith.constant false
390+
%4 = memref.alloc() {alignment = 128 : i64} : memref<i1>
391+
affine.store %false, %4[] : memref<i1>
392+
%5 = memref.alloc() {alignment = 128 : i64} : memref<i1>
393+
memref.copy %4, %5 : memref<i1> to memref<i1>
394+
affine.for %arg0 = 0 to 480 {
395+
%11 = affine.load %3[%arg0] : memref<480xi1>
396+
%12 = affine.load %5[] : memref<i1>
397+
%13 = arith.cmpi slt, %11, %12 : i1
398+
%14 = arith.select %13, %11, %12 : i1
399+
affine.store %14, %5[] : memref<i1>
400+
}
401+
402+
// CHECK: %[[FALSE:.*]] = arith.constant false
403+
// CHECK: %[[MEMREF:.*]] = memref.alloc() {alignment = 128 : i64} : memref<i1>
404+
// CHECK: affine.store %[[FALSE]], %[[MEMREF]][] : memref<i1>
405+
// CHECK: %[[TARGET:.*]] = memref.alloc() {alignment = 128 : i64} : memref<i1>
406+
// CHECK: memref.copy %alloc, %[[TARGET]] : memref<i1> to memref<i1>
407+
// CHECK: %[[FAST_MEMREF:.*]] = memref.alloc() : memref<480xi1>
408+
// CHECK: affine.for %{{.*}} = 0 to 480 {
409+
// CHECK: %{{.*}} = affine.load %arg0[%{{.*}}] : memref<480xi1>
410+
// CHECK: affine.store %{{.*}}, %[[FAST_MEMREF]][%{{.*}}] : memref<480xi1>
411+
// CHECK: }
412+
// CHECK: affine.for %arg1 = 0 to 480 {
413+
// CHECK: %[[L0:.*]] = affine.load %[[FAST_MEMREF]][%arg1] : memref<480xi1>
414+
// CHECK: %[[L1:.*]] = affine.load %[[TARGET]][] : memref<i1>
415+
// CHECK: %[[CMPI:.*]] = arith.cmpi slt, %[[L0]], %[[L1]] : i1
416+
// CHECK: %[[SELECT:.*]] = arith.select %[[CMPI]], %[[L0]], %[[L1]] : i1
417+
// CHECK: affine.store %[[SELECT]], %[[TARGET]][] : memref<i1>
418+
// CHECK: }
419+
// CHECK: memref.dealloc %[[FAST_MEMREF]] : memref<480xi1>
420+
return
421+
}

0 commit comments

Comments
 (0)