Revert "[Backend] Convert FMA dot operand to linear layout (#5469)"

whitneywhtsang · whitneywhtsang · commit b6f6b412fb82 · 2025-01-02T15:49:35.000Z
This reverts commit d9facf3.
diff --git a/lib/Conversion/TritonGPUToLLVM/ConvertLayoutOpToLLVM/SharedToDotOperandFMA.cpp b/lib/Conversion/TritonGPUToLLVM/ConvertLayoutOpToLLVM/SharedToDotOperandFMA.cpp
@@ -98,13 +98,13 @@ void storeValuesInLinearVector(PatternRewriter &rewriter, Location loc,
   }
 }
 
-bool verifyCTALayout(CTALayoutAttr ctaLayout) {
+void verifyCTALayout(CTALayoutAttr ctaLayout) {
   auto ctaSplit = ctaLayout.getCTASplitNum();
   for (auto split : ctaSplit) {
     if (split != 1)
-      return false;
+      llvm::report_fatal_error("tensors splited in CGA(thread group clusters) "
+                               "are not supported in FMA dot yet.");
   }
-  return true;
 }
 
 /// Get a linear offset of first element loaded by thread.
@@ -216,8 +216,7 @@ Value loadFMAOp(Value srcVal, Value llVal, BlockedEncodingAttr dLayout,
                 Value thread, Location loc,
                 const LLVMTypeConverter *typeConverter,
                 ConversionPatternRewriter &rewriter, const int dotOpNo) {
-  if (!verifyCTALayout(dLayout.getCTALayout()))
-    return Value();
+  verifyCTALayout(dLayout.getCTALayout());
 
   DimIdx dim;
   dim.batch = 0;
@@ -293,15 +292,6 @@ Value loadFMAOp(Value srcVal, Value llVal, BlockedEncodingAttr dLayout,
   auto numBTiles = std::max(1u, B / shapePerCTABTile);
   auto numNonKTiles = std::max(1u, NonK / shapePerCTANonKTile);
 
-  // Found discrepancy in this case,
-  // use linear layout based converter for this case
-  // TODO: break batch and non-k dimension iterations in
-  // "repeat" and "inside-repeate" parts, pack them in llvm structure
-  // according repeat and register order.
-  // See FMA.cpp:getValueTableFromStructFMA for reference
-  if (numBTiles != 1 || numNonKTiles != 1)
-    return Value();
-
   auto perThreadShape =
       getElemsPerThreadInOp(opTensorShape, shapePerCTATile, sizePerThread);
 
diff --git a/lib/Conversion/TritonGPUToLLVM/DotOpToLLVM/FMA.cpp b/lib/Conversion/TritonGPUToLLVM/DotOpToLLVM/FMA.cpp
@@ -13,51 +13,24 @@ using ::mlir::triton::gpu::expandMatrixShapeWithBatch;
 using ::mlir::triton::gpu::getShapePerCTA;
 using ::mlir::triton::gpu::getSizePerThread;
 
-/// \brief spatial position of repetition and register of a given value
-struct OperandValueKey {
-  unsigned bRepIdx, nonKRepIdx;
-  unsigned bIdx, nonKIdx, kIdx;
-
-  bool operator==(const OperandValueKey &other) const {
-    return (bRepIdx == other.bRepIdx && nonKRepIdx == other.nonKRepIdx &&
-            bIdx == other.bIdx && nonKIdx == other.nonKIdx &&
-            kIdx == other.kIdx);
-  }
-};
-
-template <> struct std::hash<OperandValueKey> {
-  std::size_t operator()(const OperandValueKey &k) const {
-    return llvm::hash_combine(k.bRepIdx, k.nonKRepIdx, k.bIdx, k.nonKIdx,
-                              k.kIdx);
-  }
-};
-
-using ValueTableFMA = std::unordered_map<OperandValueKey, Value>;
+using ValueTableFMA = std::map<std::tuple<int, int, int>, Value>;
 
-static ValueTableFMA getValueTableFromStructFMA(
-    Value val, ArrayRef<unsigned> perRepShape, ArrayRef<unsigned> repetitions,
-    unsigned kDim, unsigned nonKDim, ConversionPatternRewriter &rewriter,
-    Location loc, ArrayRef<unsigned> inRepOrder, ArrayRef<unsigned> repOrder) {
+static ValueTableFMA
+getValueTableFromStructFMA(Value val, ArrayRef<unsigned> perTileShape,
+                           unsigned kDim, unsigned nonKDim,
+                           ConversionPatternRewriter &rewriter, Location loc,
+                           ArrayRef<unsigned> order) {
   ValueTableFMA res;
   auto elems = unpackLLElements(loc, val, rewriter);
-  assert(perRepShape.size() == 3);
-  auto numElemsRep = product(perRepShape);
-  assert(elems.size() == numElemsRep * product(repetitions));
+  assert(perTileShape.size() == 3);
+  assert(elems.size() == product(perTileShape));
   assert(kDim == 1 || kDim == 2);
   assert(nonKDim == 1 || nonKDim == 2);
   const unsigned bDim = 0;
 
   for (unsigned idx = 0; idx < elems.size(); ++idx) {
-    auto inRepLinearIdx = idx % numElemsRep;
-    auto repLinearIdx = idx / numElemsRep;
-    auto inRepSpatialIdx =
-        mlir::LLVM::delinearize(inRepLinearIdx, perRepShape, inRepOrder);
-    auto repSpatialIdx =
-        mlir::LLVM::delinearize(repLinearIdx, repetitions, repOrder);
-    OperandValueKey key{repSpatialIdx[0], repSpatialIdx[nonKDim],
-                        inRepSpatialIdx[0], inRepSpatialIdx[nonKDim],
-                        inRepSpatialIdx[kDim]};
-    res[key] = elems[idx];
+    auto spatialIdx = mlir::LLVM::delinearize(idx, perTileShape, order);
+    res[{spatialIdx[bDim], spatialIdx[nonKDim], spatialIdx[kDim]}] = elems[idx];
   }
   return res;
 }
@@ -81,61 +54,46 @@ LogicalResult convertFMADot(triton::DotOp op, triton::DotOp::Adaptor adaptor,
 
   BlockedEncodingAttr dLayout =
       cast<BlockedEncodingAttr>(dTensorTy.getEncoding());
-  // TODO process A and B operand separately
-  auto inRepOrder = expandMatrixOrderWithBatch(dLayout.getOrder());
-  auto repOrder = expandMatrixOrderWithBatch(dLayout.getRepOrder());
+  auto order = expandMatrixOrderWithBatch(dLayout.getOrder());
   auto cc = unpackLLElements(loc, adaptor.getC(), rewriter);
 
   Value llA = adaptor.getA();
   Value llB = adaptor.getB();
 
   auto sizePerThread =
       expandMatrixShapeWithBatch(ArrayRef(getSizePerThread(dLayout)));
-  auto numElemsPerThread = product(sizePerThread);
   auto shapePerCTATile =
       expandMatrixShapeWithBatch(ArrayRef(getShapePerCTATile(dLayout)));
 
   unsigned K = aShapePerCTA[2];
 
-  unsigned threadTileShape[3];
-  unsigned repetitions[3];
+  unsigned perThreadShape[3];
   for (int i = 0; i < 3; ++i) {
-    repetitions[i] =
-        ceil(dShapePerCTA[i], static_cast<int64_t>(shapePerCTATile[i]));
+    unsigned numRep = dShapePerCTA[i] / shapePerCTATile[i];
+    numRep = std::max(static_cast<unsigned>(1), numRep);
+    perThreadShape[i] = numRep * sizePerThread[i];
   }
 
   auto has = getValueTableFromStructFMA(
-      llA, {sizePerThread[0], sizePerThread[1], K},
-      {repetitions[0], repetitions[1], 1},
-      /*kDim*/ 2, /*nonKDim*/ 1, rewriter, loc, inRepOrder, repOrder);
+      llA, {perThreadShape[0], perThreadShape[1], K},
+      /*kDim*/ 2, /*nonKDim*/ 1, rewriter, loc, order);
   auto hbs = getValueTableFromStructFMA(
-      llB, {sizePerThread[0], K, sizePerThread[2]},
-      {repetitions[0], 1, repetitions[2]},
-      /*kDim*/ 1, /*nonKDim*/ 2, rewriter, loc, inRepOrder, repOrder);
+      llB, {perThreadShape[0], K, perThreadShape[2]},
+      /*kDim*/ 1, /*nonKDim*/ 2, rewriter, loc, order);
 
   SmallVector<Value> acc = cc;
 
-  for (unsigned bRep = 0; bRep < repetitions[0]; ++bRep)
-    for (unsigned mRep = 0; mRep < repetitions[1]; ++mRep)
-      for (unsigned nRep = 0; nRep < repetitions[2]; ++nRep)
-        for (unsigned b = 0; b < sizePerThread[0]; ++b)
-          for (unsigned m = 0; m < sizePerThread[1]; ++m)
-            for (unsigned n = 0; n < sizePerThread[2]; ++n) {
-              SmallVector<unsigned> multiDimAccumIdx = {b, m, n};
-              unsigned linearInRepIdx =
-                  linearize(multiDimAccumIdx, sizePerThread, inRepOrder);
-              SmallVector<unsigned> multiDimRepIdx = {bRep, mRep, nRep};
-              unsigned linearRepIdx =
-                  linearize(multiDimRepIdx, repetitions, repOrder);
-              unsigned linearAccumIdx =
-                  linearInRepIdx + linearRepIdx * numElemsPerThread;
-              for (unsigned k = 0; k < K; ++k) {
-                auto aOp = has[{bRep, mRep, b, m, k}];
-                auto bOp = hbs[{bRep, nRep, b, n, k}];
-                acc[linearAccumIdx] = rewriter.create<LLVM::FMulAddOp>(
-                    loc, aOp, bOp, acc[linearAccumIdx]);
-              }
-            }
+  for (unsigned b = 0; b < perThreadShape[0]; ++b)
+    for (unsigned m = 0; m < perThreadShape[1]; ++m)
+      for (unsigned n = 0; n < perThreadShape[2]; ++n) {
+        SmallVector<unsigned> multiDimAccumIdx = {b, m, n};
+        unsigned linearAccumIdx =
+            linearize(multiDimAccumIdx, perThreadShape, order);
+        for (unsigned k = 0; k < K; ++k) {
+          acc[linearAccumIdx] = rewriter.create<LLVM::FMulAddOp>(
+              loc, has[{b, m, k}], hbs[{b, n, k}], acc[linearAccumIdx]);
+        }
+      }
 
   auto res = packLLElements(loc, typeConverter, acc, rewriter, dTensorTy);
   rewriter.replaceOp(op, res);
diff --git a/lib/Conversion/TritonGPUToLLVM/MemoryOpToLLVM.cpp b/lib/Conversion/TritonGPUToLLVM/MemoryOpToLLVM.cpp
@@ -119,13 +119,54 @@ struct LocalLoadOpConversion : public ConvertOpToLLVMPattern<LocalLoadOp> {
       : ConvertOpToLLVMPattern(typeConverter, benefit), targetInfo(targetInfo) {
   }
 
+  // FIXME [Dot LL]
+  // Do for all DotOperandEncodingAttr once we have LLs for all of them
+  static bool isSupportedLayout(Attribute dstLayout) {
+    if (isa<BlockedEncodingAttr, MmaEncodingTrait, SliceEncodingAttr,
+            LinearEncodingAttr>(dstLayout))
+      return true;
+    if (auto dot = dyn_cast<DotOperandEncodingAttr>(dstLayout)) {
+      if (isa<MmaEncodingTrait>(dot.getParent()))
+        return true;
+    }
+    return false;
+  };
+
   LogicalResult
   matchAndRewrite(LocalLoadOp op, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
-    return lowerSharedToDistributed(op, adaptor, getTypeConverter(), rewriter);
+    RankedTensorType dstTy = op.getType();
+    Attribute dstLayout = dstTy.getEncoding();
+    if (isSupportedLayout(dstLayout)) {
+      return lowerSharedToDistributed(op, adaptor, getTypeConverter(),
+                                      rewriter);
+    }
+    if (isa<DotOperandEncodingAttr>(dstLayout) &&
+        isa<BlockedEncodingAttr>(
+            cast<DotOperandEncodingAttr>(dstLayout).getParent())) {
+      return lowerSharedToDotOpFMA(op, adaptor, getTypeConverter(), rewriter);
+    }
+    return failure();
   }
 
 private:
+  LogicalResult
+  lowerSharedToDotOpFMA(LocalLoadOp op, LocalLoadOpAdaptor adaptor,
+                        const LLVMTypeConverter *typeConverter,
+                        ConversionPatternRewriter &rewriter) const {
+    auto loc = op.getLoc();
+    RankedTensorType dstTy = op.getType();
+    Attribute dstLayout = dstTy.getEncoding();
+    auto dotLayout = cast<DotOperandEncodingAttr>(dstLayout);
+    auto blockedLayout = cast<BlockedEncodingAttr>(
+        cast<DotOperandEncodingAttr>(dstLayout).getParent());
+    auto thread = getThreadId(rewriter, loc);
+    Value res = SharedToDotOperandFMA::convertLayout(
+        dotLayout.getOpIdx(), op.getSrc(), adaptor.getSrc(), blockedLayout,
+        thread, loc, getTypeConverter(), rewriter);
+    rewriter.replaceOp(op, res);
+    return success();
+  }
   LogicalResult
   lowerSharedToDistributed(LocalLoadOp op, LocalLoadOpAdaptor adaptor,
                            const LLVMTypeConverter *typeConverter,
diff --git a/lib/Dialect/TritonGPU/IR/LinearLayoutConversions.cpp b/lib/Dialect/TritonGPU/IR/LinearLayoutConversions.cpp
@@ -240,11 +240,8 @@ LinearLayout sharedToLinearLayoutLeadingOffset(ArrayRef<int64_t> shape,
   return combineCtaCgaWithShape(tileLayout, shared.getCTALayout(), shape);
 }
 
-/// Function to generate lane and warp layout for dot operands.
-LinearLayout broadcastedDotOperandLayout(MLIRContext *ctx,
-                                         ArrayRef<unsigned> shape,
-                                         ArrayRef<unsigned> order,
-                                         unsigned kDim, StringAttr inDimName) {
+LinearLayout warpsDotOperand(MLIRContext *ctx, ArrayRef<unsigned> warpShape,
+                             ArrayRef<unsigned> warpOrder, unsigned inner) {
   // Let warpsPerCTAMma = {2, 2}, then
   // warpsPerCTA = {2, 1} for opA and warpsPerCTA = {1, 2} for opB
   // assume warpOrder = {1, 0}
@@ -259,23 +256,24 @@ LinearLayout broadcastedDotOperandLayout(MLIRContext *ctx,
   //    - - | - -       - - | - -
   //    2 3 | 2 3       0 2 | 1 3
   // In other words, we need to broadcast along K
-  auto rank = shape.size();
+  auto rank = warpShape.size();
   auto dimNames = standardOutDimNames(ctx, rank);
-  LinearLayout layout = LinearLayout::empty();
+  LinearLayout warpLayout = LinearLayout::empty();
 
   // We have to broadcast along the inner dimension
   // For A, when moving along M we go from 0 to 2.
   // For B, when moving along N we go from 0 to 1.
   // As such, choosing the order of A {1, 0}, gives us the correct broadcasting
   // Same happens if the warpOrder is {0, 1}, like in Hopper
-  for (auto d : order) {
-    if (d == kDim) {
-      layout *= LinearLayout::zeros1D(shape[d], inDimName, dimNames[d]);
+  for (auto d : warpOrder) {
+    if (d == inner) {
+      warpLayout *= LinearLayout::zeros1D(warpShape[d], S("warp"), dimNames[d]);
     } else {
-      layout *= LinearLayout::identity1D(shape[d], inDimName, dimNames[d]);
+      warpLayout *=
+          LinearLayout::identity1D(warpShape[d], S("warp"), dimNames[d]);
     }
   }
-  return layout;
+  return warpLayout;
 }
 
 } // anonymous namespace
@@ -623,8 +621,7 @@ wmmaDotOperandToLinearLayout(DotOperandEncodingAttr dotWmmaLayout,
   // Generate warp layout
   auto warpsPerCTA = wmmaLayout.getWarpsPerCTA();
   auto warpOrder = triton::gpu::getWarpOrder(dotWmmaLayout);
-  LinearLayout warpLayout =
-      broadcastedDotOperandLayout(ctx, warpsPerCTA, warpOrder, kDim, S("warp"));
+  LinearLayout warpLayout = warpsDotOperand(ctx, warpsPerCTA, warpOrder, kDim);
 
   // reorder dim names in rep order, so combineCtaCgaWithShape generate proper
   // extension of layout
@@ -654,48 +651,6 @@ BlockedEncodingAttr::toLinearLayout(ArrayRef<int64_t> shape) const {
   return combineCtaCgaWithShape(ctaLayout, getCTALayout(), shape);
 }
 
-std::optional<LinearLayout>
-fmaDotToLinearLayout(DotOperandEncodingAttr operandLayout,
-                     ArrayRef<int64_t> shape) {
-  int rank = shape.size();
-  auto blocked = cast<BlockedEncodingAttr>(operandLayout.getParent());
-  MLIRContext *ctx = operandLayout.getContext();
-
-  // TODO: introduce registerOrder or use getOrder(operandLayout)
-  // Currently this order is used in legacy converter, because we do not
-  // have access to full dot operand layout, only parent part.
-  auto regOrder = blocked.getOrder();
-  // TODO: use operandLayout.getThreadOrder()
-  auto threadOrder = blocked.getThreadOrder();
-  auto warpOrder = blocked.getWarpOrder();
-  auto repOrder = blocked.getRepOrder();
-
-  StringAttr kReg = S("register");
-  StringAttr kLane = S("lane");
-  StringAttr kWarp = S("warp");
-
-  SmallVector<unsigned> threadSize = blocked.getSizePerThread();
-  auto kDimIdx = operandLayout.getOpIdx() == 0 ? rank - 1 : rank - 2;
-  threadSize[kDimIdx] = shape[kDimIdx];
-  auto threadShape = blocked.getThreadsPerWarp();
-  auto warpShape = blocked.getWarpsPerCTA();
-
-  SmallVector<StringAttr> repDimNames =
-      permuteDimNames(standardOutDimNames(ctx, rank), repOrder);
-
-  auto registersLayout = identityStandardND(kReg, threadSize, regOrder);
-  auto lanesLayout = broadcastedDotOperandLayout(ctx, threadShape, threadOrder,
-                                                 kDimIdx, kLane);
-  auto warpsLayout =
-      broadcastedDotOperandLayout(ctx, warpShape, warpOrder, kDimIdx, kWarp);
-
-  LinearLayout ctaLayout = registersLayout.transposeOuts(repDimNames) *
-                           lanesLayout.transposeOuts(repDimNames) *
-                           warpsLayout.transposeOuts(repDimNames);
-
-  return combineCtaCgaWithShape(ctaLayout, getCTALayout(operandLayout), shape);
-}
-
 LinearLayout nvidiaMmaTile(MLIRContext *ctx, ArrayRef<unsigned> tileShape,
                            unsigned kWidth, ArrayRef<unsigned> order,
                            ArrayRef<unsigned> repOrder) {
@@ -786,21 +741,19 @@ LinearLayout nvidiaDotToLinearLayout(ArrayRef<int64_t> shape,
   auto ctaLayout =
       nvidiaMmaTile(ctx, tileShape, kWidth, getOrder(dot), dot.getRepOrder());
   auto kDim = isA ? rank - 1 : rank - 2;
-  ctaLayout *= broadcastedDotOperandLayout(ctx, mma.getWarpsPerCTA(),
-                                           mma.getWarpOrder(), kDim, S("warp"))
-                   .transposeOuts(llvm::to_vector(ctaLayout.getOutDimNames()));
+  ctaLayout *=
+      warpsDotOperand(ctx, mma.getWarpsPerCTA(), mma.getWarpOrder(), kDim)
+          .transposeOuts(llvm::to_vector(ctaLayout.getOutDimNames()));
 
   return combineCtaCgaWithShape(ctaLayout, getCTALayout(dot), shape);
 }
 
 std::optional<LinearLayout>
 DotOperandEncodingAttr::toLinearLayout(ArrayRef<int64_t> shape) const {
   auto parent = getParent();
-  if (auto blockedLayout = mlir::dyn_cast<BlockedEncodingAttr>(parent)) {
-    return fmaDotToLinearLayout(*this, shape);
-  } else if (auto mfmaLayout = mlir::dyn_cast<AMDMfmaEncodingAttr>(parent)) {
+  if (auto mfmaLayout = llvm::dyn_cast<AMDMfmaEncodingAttr>(parent)) {
     return mfmaDotToLinearLayout(*this, shape);
-  } else if (auto wmmaLayout = mlir::dyn_cast<AMDWmmaEncodingAttr>(parent)) {
+  } else if (auto wmmaLayout = llvm::dyn_cast<AMDWmmaEncodingAttr>(parent)) {
     return wmmaDotOperandToLinearLayout(*this, shape);
   } else if (auto mma = mlir::dyn_cast<NvidiaMmaEncodingAttr>(parent)) {
     return nvidiaDotToLinearLayout(shape, *this);
diff --git a/test/Conversion/amd/decompose-unsupported-conversions.mlir b/test/Conversion/amd/decompose-unsupported-conversions.mlir
@@ -97,11 +97,11 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, ttg.targ
 #blocked = #ttg.blocked<{sizePerThread = [1, 32], threadsPerWarp = [32, 2], warpsPerCTA = [2, 2], order = [1, 0]}>
 #blocked1 = #ttg.blocked<{sizePerThread = [1, 32], threadsPerWarp = [32, 2], warpsPerCTA = [4, 1], order = [1, 0]}>
 module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, ttg.target = "hip:gfx940", "ttg.threads-per-warp" = 64 : i32} {
-  tt.func @neg_blocked_to_dot_op_incompatible_warp_gfx940(%arg0: tensor<128x128xf16, #blocked>) {
+  tt.func @neg_blocked_to_dot_op_incompatible_warp_gfx940(%arg0: tensor<32x32xf16, #blocked>) {
     // CHECK-NOT: ttg.convert_layout
     // CHECK: ttg.local_alloc
     // CHECK: ttg.local_load
-    %0 = ttg.convert_layout %arg0 : tensor<128x128xf16, #blocked> -> tensor<128x128xf16, #ttg.dot_op<{opIdx = 0, parent = #blocked1}>>
+    %0 = ttg.convert_layout %arg0 : tensor<32x32xf16, #blocked> -> tensor<32x32xf16, #ttg.dot_op<{opIdx = 0, parent = #blocked1}>>
     tt.return
   }
 }
diff --git a/unittest/Dialect/TritonGPU/LinearLayoutConversionsTest.cpp b/unittest/Dialect/TritonGPU/LinearLayoutConversionsTest.cpp

Original file line number	Diff line number	Diff line change
`@@ -97,11 +97,11 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, ttg.targ`
`97`	`97`	`#blocked = #ttg.blocked<{sizePerThread = [1, 32], threadsPerWarp = [32, 2], warpsPerCTA = [2, 2], order = [1, 0]}>`
`98`	`98`	`#blocked1 = #ttg.blocked<{sizePerThread = [1, 32], threadsPerWarp = [32, 2], warpsPerCTA = [4, 1], order = [1, 0]}>`
`99`	`99`	`module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, ttg.target = "hip:gfx940", "ttg.threads-per-warp" = 64 : i32} {`
`100`		`- tt.func @neg_blocked_to_dot_op_incompatible_warp_gfx940(%arg0: tensor<128x128xf16, #blocked>) {`
	`100`	`+ tt.func @neg_blocked_to_dot_op_incompatible_warp_gfx940(%arg0: tensor<32x32xf16, #blocked>) {`
`101`	`101`	`// CHECK-NOT: ttg.convert_layout`
`102`	`102`	`// CHECK: ttg.local_alloc`
`103`	`103`	`// CHECK: ttg.local_load`
`104`		`- %0 = ttg.convert_layout %arg0 : tensor<128x128xf16, #blocked> -> tensor<128x128xf16, #ttg.dot_op<{opIdx = 0, parent = #blocked1}>>`
	`104`	`+ %0 = ttg.convert_layout %arg0 : tensor<32x32xf16, #blocked> -> tensor<32x32xf16, #ttg.dot_op<{opIdx = 0, parent = #blocked1}>>`
`105`	`105`	`tt.return`
`106`	`106`	`}`
`107`	`107`	`}`