Skip to content

Commit 8491a77

Browse files
committed
Merge remote-tracking branch 'upstream/main'
2 parents 9774b1c + a1f7591 commit 8491a77

File tree

16 files changed

+2277
-174
lines changed

16 files changed

+2277
-174
lines changed

include/imex/Conversion/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ add_mlir_doc(Passes IMEXConversionPasses ./ -gen-pass-doc)
88
add_subdirectory(DistToStandard)
99
add_subdirectory(DropRegions)
1010
add_subdirectory(XeTileToXeGPU)
11+
add_subdirectory(XeGPUToVC)

include/imex/Conversion/Passes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <imex/Conversion/GPUXToLLVM/GPUXToLLVMPass.h>
2525
#include <imex/Conversion/NDArrayToLinalg/NDArrayToLinalg.h>
2626
#include <imex/Conversion/XeGPUToSPIRV/XeGPUToSPIRV.h>
27+
#include <imex/Conversion/XeGPUToVC/XeGPUToVC.h>
2728
#include <imex/Conversion/XeTileToXeGPU/XeTileToXeGPU.h>
2829

2930
namespace imex {

include/imex/Conversion/Passes.td

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,4 +410,36 @@ def ConvertXeTileToXeGPU: Pass<"convert-xetile-to-xegpu", "::mlir::gpu::GPUModul
410410
];
411411
}
412412

413+
414+
//===----------------------------------------------------------------------===//
415+
// XeGPUToVC
416+
//===----------------------------------------------------------------------===//
417+
418+
def ConvertXeGPUToVC : Pass<"convert-xegpu-to-vc", "::mlir::gpu::GPUModuleOp"> {
419+
let summary = "Generate vc-intrinsics functions for xegpu dialect operations";
420+
let description = [{
421+
Convert XeGPU dialect operations into the Func dialect calls to vc-intrinsics
422+
functions.
423+
}];
424+
let options = [
425+
Option<"enableJointMatrix", "enable-joint-matrix","bool", "false",
426+
"Enable XeGPU SIMT mode Ops lowered to JointMatrix based Ops">,
427+
Option<"enableGenISAIntrinsic", "enable-genisa-intrinsic","bool", "false",
428+
"Enable XeGPU SIMT mode Ops lowered to JointMatrix based Ops">,
429+
Option<"enableVCIntrinsic", "enable-vc-intrinsic","bool", "true",
430+
"Enable XeGPU Ops lowered to intel vc Intrinsics">,
431+
Option<"useRawSend", "useRawSend","bool", "true",
432+
"Prefer Raw Send API for load/store">
433+
];
434+
435+
let dependentDialects = ["::imex::xegpu::XeGPUDialect",
436+
"::mlir::vector::VectorDialect",
437+
"::mlir::memref::MemRefDialect",
438+
"::mlir::LLVM::LLVMDialect",
439+
"::mlir::func::FuncDialect",
440+
"::mlir::arith::ArithDialect",
441+
];
442+
let constructor = "imex::createConvertXeGPUToVCPass()";
443+
}
444+
413445
#endif // _IMEX_CONVERSION_PASSES_TD_INCLUDED_

include/imex/Conversion/XeGPUToVC/CMakeLists.txt

Whitespace-only changes.
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
//===- XeGPUToVC.h - Conversion---------------*- C++ -*-===//
2+
//
3+
// Copyright 2024 Intel Corporation
4+
// Part of the IMEX Project, under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
//
8+
//===----------------------------------------------------------------------===//
9+
///
10+
/// \file
11+
/// This file implements conversion of the XeGPU dialect operations into Func
12+
/// dialect calls to vc-intrinsics functions
13+
///
14+
//===----------------------------------------------------------------------===//
15+
#ifndef IMEX_CONVERSION_XEGPUTOVC_H
16+
#define IMEX_CONVERSION_XEGPUTOVC_H
17+
#include <mlir/Dialect/Vector/IR/VectorOps.h>
18+
19+
#include "imex/Dialect/XeGPU/IR/XeGPU.h"
20+
#include "imex/Dialect/XeTile/IR/XeTileOps.h"
21+
#include "imex/Utils/XeCommon.h"
22+
23+
namespace mlir {
24+
25+
class ConversionTarget;
26+
class LLVMTypeConverter;
27+
class Pass;
28+
class Operation;
29+
class RewritePatternSet;
30+
template <typename T> class OperationPass;
31+
32+
namespace gpu {
33+
class GPUModuleOp;
34+
} // namespace gpu
35+
36+
} // namespace mlir
37+
38+
namespace imex {
39+
40+
std::unique_ptr<::mlir::OperationPass<::mlir::gpu::GPUModuleOp>>
41+
createConvertXeGPUToVCPass();
42+
43+
} // namespace imex
44+
#endif

include/imex/Utils/XeCommon.h

Lines changed: 106 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,15 @@
1616
#ifndef _IMEX_XECOMMON_H_
1717
#define _IMEX_XECOMMON_H_
1818

19+
#include "imex/Dialect/XeGPU/IR/XeGPU.h"
20+
#include "imex/Dialect/XeTile/IR/XeTileOps.h"
1921
#include <mlir/Dialect/GPU/IR/GPUDialect.h>
2022
#include <mlir/Dialect/SCF/IR/SCF.h>
2123
#include <mlir/IR/BuiltinOps.h>
2224
#include <mlir/IR/Value.h>
2325
#include <mlir/Transforms/DialectConversion.h>
2426
#include <mlir/Transforms/OneToNTypeConversion.h>
25-
27+
using namespace imex::xegpu;
2628
namespace imex {
2729

2830
// It checks each GPUFuncOp in the module to see
@@ -85,7 +87,7 @@ class TileUsageAnalysis {
8587
else if (idx == 2)
8688
Usage[op] |= (uint)UsageType::DPAS_C;
8789
else
88-
llvm::dbgs() << "unknown usage: " << idx;
90+
op->emitOpError() << "unknown usage: " << idx;
8991
}
9092

9193
if (auto unpack =
@@ -269,6 +271,108 @@ class PropagateAnalysis {
269271
}
270272
};
271273

274+
std::pair<std::string, mlir::VectorType>
275+
encodeVectorType(mlir::ConversionPatternRewriter &rewriter,
276+
mlir::VectorType type, bool use64bitData = false,
277+
bool enforceInteger = false);
278+
279+
unsigned encodeDataum(mlir::Type type);
280+
281+
unsigned encodeOpcode(AtomicRMWKind kind);
282+
283+
// L1 and L3 Cache Policies for Load Operation
284+
// L1 Cache Policies: Uncached (UC), Cached (C), Cache Streaming (S),
285+
// Invalidate-After-Read (IAR) L3 Cache Policies: Uncached (UC), Cached (C)
286+
#define L1UC_L3UC 1
287+
#define L1UC_L3C 2
288+
#define L1C_L3UC 3
289+
#define L1C_L3C 4
290+
#define L1S_L3UC 5
291+
#define L1S_L3C 6
292+
#define L1IAR_L3C 7
293+
294+
// L1 and L3 Cache Policies for Store operation
295+
// L1 Cache Policies: Uncached (UC), Write-Through (WT), Write-Back (WB),
296+
// Streaming (S) L3 Cache Policies: Uncached (UC), Cached (WB)
297+
#define L1UC_L3WB 2
298+
#define L1WT_L3UC 3
299+
#define L1WT_L3WB 4
300+
#define L1S_L3UC 5
301+
#define L1S_L3WB 6
302+
#define L1WB_L3WB 7
303+
304+
template <typename OpType> unsigned encodeCacheHint(OpType op) {
305+
auto l1hint = op.getL1Hint();
306+
auto l3hint = op.getL3Hint();
307+
308+
constexpr bool isStore = std::is_same_v<OpType, StoreNDOp> ||
309+
std::is_same_v<OpType, StoreScatterOp>;
310+
unsigned cacheHint = L1UC_L3UC;
311+
312+
#define SET_CACHEVALUE(hint, cacheHintVal) \
313+
hint.has_value() ? hint.value() : cacheHintVal
314+
315+
if constexpr (!isStore) {
316+
317+
auto l1CacheValue = SET_CACHEVALUE(l1hint, CacheReadHint::UNCACHED);
318+
auto l3CacheValue = SET_CACHEVALUE(l3hint, CacheReadHint::UNCACHED);
319+
320+
// Setting Cache policy override based on L3 Uncached/Cached value for Load
321+
// operation
322+
#define SET_L1L3_CACHEREADHINT(cacheHint, l3CacheValue, uncachedVal, \
323+
cachedVal) \
324+
if (l3CacheValue == CacheReadHint::UNCACHED) \
325+
cacheHint = uncachedVal; \
326+
else if (l3CacheValue == CacheReadHint::CACHED) \
327+
cacheHint = cachedVal;
328+
329+
switch (l1CacheValue) {
330+
case CacheReadHint::UNCACHED:
331+
SET_L1L3_CACHEREADHINT(cacheHint, l3CacheValue, L1UC_L3UC, L1UC_L3C);
332+
break;
333+
case CacheReadHint::CACHED:
334+
SET_L1L3_CACHEREADHINT(cacheHint, l3CacheValue, L1C_L3UC, L1C_L3C);
335+
break;
336+
case CacheReadHint::STREAMING:
337+
SET_L1L3_CACHEREADHINT(cacheHint, l3CacheValue, L1S_L3UC, L1S_L3C);
338+
break;
339+
case CacheReadHint::READ_INVALIDATE:
340+
if (l3CacheValue == CacheReadHint::CACHED)
341+
cacheHint = L1IAR_L3C;
342+
break;
343+
}
344+
345+
} else {
346+
auto l1CacheValue = SET_CACHEVALUE(l1hint, CacheWriteHint::UNCACHED);
347+
auto l3CacheValue = SET_CACHEVALUE(l3hint, CacheWriteHint::UNCACHED);
348+
349+
// Setting Cache policy override based on L3 Uncached/Write-Back value for Store
350+
// operation
351+
#define SET_L1L3_CACHEWRITEHINT(cacheHint, l3CacheValue, uncachedVal, \
352+
cachedVal) \
353+
if (l3CacheValue == CacheWriteHint::UNCACHED) \
354+
cacheHint = uncachedVal; \
355+
else if (l3CacheValue == CacheWriteHint::WRITE_BACK) \
356+
cacheHint = cachedVal;
357+
358+
switch (l1CacheValue) {
359+
case CacheWriteHint::UNCACHED:
360+
SET_L1L3_CACHEWRITEHINT(cacheHint, l3CacheValue, L1UC_L3UC, L1UC_L3WB);
361+
break;
362+
case CacheWriteHint::WRITE_THROUGH:
363+
SET_L1L3_CACHEWRITEHINT(cacheHint, l3CacheValue, L1WT_L3UC, L1WT_L3WB);
364+
break;
365+
case CacheWriteHint::STREAMING:
366+
SET_L1L3_CACHEWRITEHINT(cacheHint, l3CacheValue, L1S_L3UC, L1S_L3WB);
367+
break;
368+
case CacheWriteHint::WRITE_BACK:
369+
if (l3CacheValue == CacheWriteHint::WRITE_BACK)
370+
cacheHint = L1WB_L3WB;
371+
break;
372+
}
373+
}
374+
return cacheHint;
375+
}
272376
class XeTypeConverter : public mlir::OneToNTypeConverter {
273377
public:
274378
// friend class XeConversionPattern;

lib/Conversion/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ add_subdirectory(GPUToGPUX)
66
add_subdirectory(GPUXToLLVM)
77
add_subdirectory(XeGPUToSPIRV)
88
add_subdirectory(XeTileToXeGPU)
9+
add_subdirectory(XeGPUToVC)

0 commit comments

Comments
 (0)