@@ -5213,7 +5213,8 @@ static SDValue PerformLoadCombine(SDNode *N,
52135213
52145214 SmallDenseMap<SDNode *, unsigned > ExtractElts;
52155215 SmallVector<SDNode *> ProxyRegs (OrigNumResults, nullptr );
5216- SmallVector<std::pair<SDNode *, unsigned /* offset*/ >> WorkList{{N, 0 }};
5216+ SmallVector<std::pair<SDNode *, unsigned >> WorkList{{N, {}}};
5217+ bool ProcessingInitialLoad = true ;
52175218 while (!WorkList.empty ()) {
52185219 auto [V, Offset] = WorkList.pop_back_val ();
52195220
@@ -5223,10 +5224,12 @@ static SDValue PerformLoadCombine(SDNode *N,
52235224 if (U.getValueType () == MVT::Other || U.getValueType () == MVT::Glue)
52245225 continue ; // we'll process chain/glue later
52255226
5227+ if (ProcessingInitialLoad)
5228+ Offset = U.getResNo ();
5229+
52265230 SDNode *User = U.getUser ();
52275231 if (User->getOpcode () == NVPTXISD::ProxyReg) {
5228- Offset = U.getResNo () * 2 ;
5229- SDNode *&ProxyReg = ProxyRegs[Offset / 2 ];
5232+ SDNode *&ProxyReg = ProxyRegs[Offset];
52305233
52315234 // We shouldn't have multiple proxy regs for the same value from the
52325235 // load, but bail out anyway since we don't handle this.
@@ -5238,13 +5241,13 @@ static SDValue PerformLoadCombine(SDNode *N,
52385241 User->getValueType (0 ) == MVT::v2f32 &&
52395242 U.getValueType () == MVT::i64 ) {
52405243 // match v2f32 = bitcast i64
5241- Offset = U. getResNo () * 2 ;
5244+ // continue and push the instruction
52425245 } else if (User->getOpcode () == ISD::EXTRACT_VECTOR_ELT &&
52435246 User->getValueType (0 ) == MVT::f32 ) {
52445247 // match f32 = extractelt v2f32
52455248 if (auto *CI = dyn_cast<ConstantSDNode>(User->getOperand (1 ))) {
52465249 unsigned Index = CI->getZExtValue ();
5247- ExtractElts[User] = Offset + Index;
5250+ ExtractElts[User] = 2 * Offset + Index;
52485251 continue ; // don't search
52495252 }
52505253 return SDValue (); // could not match
@@ -5254,6 +5257,9 @@ static SDValue PerformLoadCombine(SDNode *N,
52545257 // enqueue this to visit its uses
52555258 WorkList.push_back ({User, Offset});
52565259 }
5260+
5261+ // After we're done with the load, propagate the result offsets.
5262+ ProcessingInitialLoad = false ;
52575263 }
52585264
52595265 // (2) If the load's value is only used as f32 elements, replace all
0 commit comments