@@ -74,11 +74,11 @@ getMaybeLaneLayout(xegpu::TensorDescType tdescType) {
7474// / In this case, lane layout is transposed (from the usual [1, SG_SIZE] form)
7575// / indicating that this is a load that requires transpose effect. However,
7676// / lane data is [1, 2], meaning that each lane must grab 2 f16 elements from
77- // / the inner dimension. We convert this to a canonical form by converting the
77+ // / the inner dimension. We convert this to a optimized form by converting the
7878// / tensor_desc to i32 type such that lane data becomes [1, 1]. This makes the
7979// / later lowering easily use the load with transpose instruction.
80- static bool canBeCanonicalizedForTranspose (ArrayRef<int64_t > laneLayout,
81- ArrayRef<int64_t > laneData) {
80+ static bool canBeOptimizedForTranspose (ArrayRef<int64_t > laneLayout,
81+ ArrayRef<int64_t > laneData) {
8282 if (laneLayout.size () != 2 || laneData.size () != 2 )
8383 return false ;
8484 if (laneLayout[0 ] == 1 || laneLayout[1 ] != 1 )
@@ -90,7 +90,7 @@ static bool canBeCanonicalizedForTranspose(ArrayRef<int64_t> laneLayout,
9090
9191// / A tensor desc type can be optimized if its element type is less than 32 bits
9292// / and its layout can be optimized.
93- static bool canBeCanonicalizedForTranspose (xegpu::TensorDescType tdescType) {
93+ static bool canBeOptimizedForTranspose (xegpu::TensorDescType tdescType) {
9494 // If the dtype is greater or equal to 32 bits, layout must be valid.
9595 int elementTyBitwidth = tdescType.getElementType ().getIntOrFloatBitWidth ();
9696 if (elementTyBitwidth >= 32 )
@@ -99,14 +99,14 @@ static bool canBeCanonicalizedForTranspose(xegpu::TensorDescType tdescType) {
9999 auto maybeLaneData = getMaybeLaneData (tdescType);
100100 if (!maybeLaneData || !maybeLaneLayout)
101101 return false ;
102- return canBeCanonicalizedForTranspose (*maybeLaneLayout, *maybeLaneData);
102+ return canBeOptimizedForTranspose (*maybeLaneLayout, *maybeLaneData);
103103}
104104
105105// / Check if a tensor desc type can be optimized for transpose, if so return the
106106// / new optimized tensor desc type with a valid transpose layout.
107107static xegpu::TensorDescType tryOptimize (xegpu::TensorDescType tdescType,
108108 const uArch *targetuArch) {
109- if (!canBeCanonicalizedForTranspose (tdescType))
109+ if (!canBeOptimizedForTranspose (tdescType))
110110 return tdescType;
111111 auto laneData = getMaybeLaneData (tdescType)
112112 .value (); // Lane data must exist if we reach here.
@@ -454,11 +454,11 @@ struct XeGPUOptimizeBlockLoadsPass final
454454 // converted.
455455 target.addDynamicallyLegalOp <xegpu::CreateNdDescOp>(
456456 [&](xegpu::CreateNdDescOp createNdOp) {
457- return !canBeCanonicalizedForTranspose (createNdOp.getType ());
457+ return !canBeOptimizedForTranspose (createNdOp.getType ());
458458 });
459459 target.addDynamicallyLegalOp <xegpu::LoadNdOp>(
460460 [&](xegpu::LoadNdOp loadNdOp) {
461- return !canBeCanonicalizedForTranspose (loadNdOp.getTensorDescType ());
461+ return !canBeOptimizedForTranspose (loadNdOp.getTensorDescType ());
462462 });
463463 // Vector ExtractOps can have optimizable layouts if they extract from
464464 // LoadNdOps with array length greater than 1. These ExtractOps must be
@@ -470,7 +470,7 @@ struct XeGPUOptimizeBlockLoadsPass final
470470 return true ;
471471 auto laneLayout = layout.getEffectiveLaneLayoutAsInt ();
472472 auto laneData = layout.getEffectiveLaneDataAsInt ();
473- return !canBeCanonicalizedForTranspose (laneLayout, laneData);
473+ return !canBeOptimizedForTranspose (laneLayout, laneData);
474474 });
475475 converter.addConversion ([](Type type) { return type; });
476476
0 commit comments