@@ -2184,6 +2184,50 @@ defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2f
21842184
21852185} // End OtherPredicates = [HasFlatScratchInsts,EnableFlatScratch]
21862186
2187+ def PrefetchLoc: SDNodeXForm<timm, [{
2188+ uint32_t V = N->getZExtValue();
2189+ V = (AMDGPU::CPol::SCOPE_MASK - (V & AMDGPU::CPol::SCOPE_MASK)) << AMDGPU::CPol::SCOPE_SHIFT;
2190+ if (!Subtarget->hasSafeCUPrefetch())
2191+ V = std::max(V, (uint32_t)AMDGPU::CPol::SCOPE_SE); // CU scope is unsafe
2192+ return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i32);
2193+ }]>;
2194+
2195+ def prefetch_flat : PatFrag <(ops node:$ptr, node:$rw, node:$loc, node:$type),
2196+ (prefetch node:$ptr, node:$rw, node:$loc, node:$type),
2197+ [{ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS; }]> {
2198+ let GISelPredicateCode = [{
2199+ return (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
2200+ }];
2201+ }
2202+
2203+ def prefetch_global : PatFrag <(ops node:$ptr, node:$rw, node:$loc, node:$type),
2204+ (prefetch node:$ptr, node:$rw, node:$loc, node:$type),
2205+ [{ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
2206+ (cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
2207+ !Subtarget->hasSafeSmemPrefetch()); }]> {
2208+ let GISelPredicateCode = [{
2209+ return (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
2210+ ((*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
2211+ !Subtarget->hasSafeSmemPrefetch());
2212+ }];
2213+ }
2214+
2215+ multiclass FlatPrefetchPats<string inst, SDPatternOperator prefetch_kind, SDPatternOperator rw> {
2216+ def : GCNPat <
2217+ (prefetch_kind (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset), rw, (i32 timm:$loc), i32imm_one),
2218+ (!cast<FLAT_Pseudo>(inst) $vaddr, $offset, (i32 (PrefetchLoc $loc)))
2219+ > {
2220+ let AddedComplexity = !if(!eq(rw, i32imm_zero), 0, 25);
2221+ }
2222+
2223+ def : GCNPat <
2224+ (prefetch_kind (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), rw, (i32 timm:$loc), i32imm_one),
2225+ (!cast<FLAT_Pseudo>(inst#"_SADDR") $saddr, $voffset, $offset, (i32 (PrefetchLoc $loc)))
2226+ > {
2227+ let AddedComplexity = !if(!eq(rw, i32imm_zero), 11, 30);
2228+ }
2229+ }
2230+
21872231multiclass FlatIntrPrefetchPats<string inst, SDPatternOperator intr> {
21882232 def : GCNPat <
21892233 (intr (FlatOffset i64:$vaddr, i32:$offset), timm:$cpol),
@@ -2198,6 +2242,14 @@ multiclass FlatIntrPrefetchPats<string inst, SDPatternOperator intr> {
21982242}
21992243
22002244let SubtargetPredicate = HasVmemPrefInsts in {
2245+ defm : FlatPrefetchPats<"FLAT_PREFETCH_B8", prefetch_flat, i32imm_zero>;
2246+ defm : FlatPrefetchPats<"GLOBAL_PREFETCH_B8", prefetch_global, i32imm_zero>;
2247+
2248+ // Patterns for forced vector prefetch with rw = 1.
2249+ defm : FlatPrefetchPats<"FLAT_PREFETCH_B8", prefetch_flat, i32imm_one>;
2250+ defm : FlatPrefetchPats<"GLOBAL_PREFETCH_B8", prefetch_global, i32imm_one>;
2251+
2252+
22012253 // Patterns for target intrinsics
22022254 defm : FlatIntrPrefetchPats<"FLAT_PREFETCH_B8", int_amdgcn_flat_prefetch>;
22032255 defm : FlatIntrPrefetchPats<"GLOBAL_PREFETCH_B8", int_amdgcn_global_prefetch>;
0 commit comments