@@ -2174,6 +2174,50 @@ defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2f
21742174
21752175} // End OtherPredicates = [HasFlatScratchInsts,EnableFlatScratch]
21762176
2177+ def PrefetchLoc: SDNodeXForm<timm, [{
2178+ uint32_t V = N->getZExtValue();
2179+ V = (AMDGPU::CPol::SCOPE_MASK - (V & AMDGPU::CPol::SCOPE_MASK)) << AMDGPU::CPol::SCOPE_SHIFT;
2180+ if (!Subtarget->hasSafeCUPrefetch())
2181+ V = std::max(V, (uint32_t)AMDGPU::CPol::SCOPE_SE); // CU scope is unsafe
2182+ return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i32);
2183+ }]>;
2184+
2185+ def prefetch_flat : PatFrag <(ops node:$ptr, node:$rw, node:$loc, node:$type),
2186+ (prefetch node:$ptr, node:$rw, node:$loc, node:$type),
2187+ [{ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS; }]> {
2188+ let GISelPredicateCode = [{
2189+ return (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
2190+ }];
2191+ }
2192+
2193+ def prefetch_global : PatFrag <(ops node:$ptr, node:$rw, node:$loc, node:$type),
2194+ (prefetch node:$ptr, node:$rw, node:$loc, node:$type),
2195+ [{ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
2196+ (cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
2197+ !Subtarget->hasSafeSmemPrefetch()); }]> {
2198+ let GISelPredicateCode = [{
2199+ return (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
2200+ ((*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
2201+ !Subtarget->hasSafeSmemPrefetch());
2202+ }];
2203+ }
2204+
2205+ multiclass FlatPrefetchPats<string inst, SDPatternOperator prefetch_kind, SDPatternOperator rw> {
2206+ def : GCNPat <
2207+ (prefetch_kind (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset), rw, (i32 timm:$loc), i32imm_one),
2208+ (!cast<FLAT_Pseudo>(inst) $vaddr, $offset, (i32 (PrefetchLoc $loc)))
2209+ > {
2210+ let AddedComplexity = !if(!eq(rw, i32imm_zero), 0, 25);
2211+ }
2212+
2213+ def : GCNPat <
2214+ (prefetch_kind (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), rw, (i32 timm:$loc), i32imm_one),
2215+ (!cast<FLAT_Pseudo>(inst#"_SADDR") $saddr, $voffset, $offset, (i32 (PrefetchLoc $loc)))
2216+ > {
2217+ let AddedComplexity = !if(!eq(rw, i32imm_zero), 11, 30);
2218+ }
2219+ }
2220+
21772221multiclass FlatIntrPrefetchPats<string inst, SDPatternOperator intr> {
21782222 def : GCNPat <
21792223 (intr (FlatOffset i64:$vaddr, i32:$offset), timm:$cpol),
@@ -2188,6 +2232,14 @@ multiclass FlatIntrPrefetchPats<string inst, SDPatternOperator intr> {
21882232}
21892233
21902234let SubtargetPredicate = HasVmemPrefInsts in {
2235+ defm : FlatPrefetchPats<"FLAT_PREFETCH_B8", prefetch_flat, i32imm_zero>;
2236+ defm : FlatPrefetchPats<"GLOBAL_PREFETCH_B8", prefetch_global, i32imm_zero>;
2237+
2238+ // Patterns for forced vector prefetch with rw = 1.
2239+ defm : FlatPrefetchPats<"FLAT_PREFETCH_B8", prefetch_flat, i32imm_one>;
2240+ defm : FlatPrefetchPats<"GLOBAL_PREFETCH_B8", prefetch_global, i32imm_one>;
2241+
2242+
21912243 // Patterns for target intrinsics
21922244 defm : FlatIntrPrefetchPats<"FLAT_PREFETCH_B8", int_amdgcn_flat_prefetch>;
21932245 defm : FlatIntrPrefetchPats<"GLOBAL_PREFETCH_B8", int_amdgcn_global_prefetch>;
0 commit comments