Skip to content

Commit 4708dcb

Browse files
authored
Merge branch 'main' into wcsspn-cspn-fixes
2 parents 268a00e + 9052a85 commit 4708dcb

File tree

9 files changed

+97
-15
lines changed

9 files changed

+97
-15
lines changed

llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1486,7 +1486,6 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const {
14861486
}
14871487

14881488
bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) {
1489-
assert(DemandedVLs.size() == 0);
14901489
if (skipFunction(MF.getFunction()))
14911490
return false;
14921491

@@ -1499,6 +1498,8 @@ bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) {
14991498

15001499
TII = ST.getInstrInfo();
15011500

1501+
assert(DemandedVLs.empty());
1502+
15021503
// For each instruction that defines a vector, compute what VL its
15031504
// downstream users demand.
15041505
for (MachineBasicBlock *MBB : post_order(&MF)) {

llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -550,8 +550,8 @@ define {<vscale x 16 x i8>, <vscale x 16 x i8>} @masked_load_factor2(ptr %p) {
550550
ret {<vscale x 16 x i8>, <vscale x 16 x i8>} %deinterleaved.results
551551
}
552552

553-
define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_loat_factor4(ptr %p) {
554-
; CHECK-LABEL: masked_loat_factor4:
553+
define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_load_factor4(ptr %p) {
554+
; CHECK-LABEL: masked_load_factor4:
555555
; CHECK: # %bb.0:
556556
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
557557
; CHECK-NEXT: vlseg4e8.v v8, (a0)
@@ -561,8 +561,8 @@ define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i
561561
ret {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} %deinterleaved.results
562562
}
563563

564-
define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_loat_factor4_mask(ptr %p, <vscale x 8 x i1> %mask) {
565-
; CHECK-LABEL: masked_loat_factor4_mask:
564+
define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_load_factor4_mask(ptr %p, <vscale x 8 x i1> %mask) {
565+
; CHECK-LABEL: masked_load_factor4_mask:
566566
; CHECK: # %bb.0:
567567
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
568568
; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t
@@ -575,8 +575,8 @@ define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i
575575

576576
; Negative test - some of the deinterleaved elements might come from the
577577
; passthru not the load
578-
define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_loat_factor4_passthru(ptr %p, <vscale x 8 x i1> %mask, <vscale x 32 x i8> %passthru) {
579-
; CHECK-LABEL: masked_loat_factor4_passthru:
578+
define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_load_factor4_passthru(ptr %p, <vscale x 8 x i1> %mask, <vscale x 32 x i8> %passthru) {
579+
; CHECK-LABEL: masked_load_factor4_passthru:
580580
; CHECK: # %bb.0:
581581
; CHECK-NEXT: addi sp, sp, -16
582582
; CHECK-NEXT: .cfi_def_cfa_offset 16

mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
1717
#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
1818
#include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
19+
#include "mlir/Dialect/Utils/IndexingUtils.h"
1920
#include "mlir/IR/Builders.h"
2021
#include "mlir/IR/BuiltinTypes.h"
2122
#include "mlir/IR/Diagnostics.h"
@@ -89,7 +90,22 @@ static FailureOr<MemRefType> getFatRawBufferTypeLike(MemRefType source,
8990
auto stridedLayout = dyn_cast<StridedLayoutAttr>(layout);
9091
if (!stridedLayout)
9192
return failure();
92-
mb.setLayout(StridedLayoutAttr::get(ctx, 0, stridedLayout.getStrides()));
93+
MemRefLayoutAttrInterface newLayout =
94+
StridedLayoutAttr::get(ctx, 0, stridedLayout.getStrides());
95+
// Special case: if resetting the offset causes the strided layout to become
96+
// the identity layout, then reset to the identity layout.
97+
// TODO: this'll get a lot simpler when we have the contiguous layout.
98+
SmallVector<int64_t> stridesIfIdentity;
99+
if (source.hasStaticShape()) {
100+
stridesIfIdentity = computeSuffixProduct(source.getShape());
101+
} else if (source.getRank() <= 1) {
102+
stridesIfIdentity = SmallVector<int64_t>(source.getRank(), 1);
103+
}
104+
if (stridesIfIdentity == stridedLayout.getStrides()) {
105+
newLayout = AffineMapAttr::get(
106+
AffineMap::getMultiDimIdentityMap(source.getRank(), ctx));
107+
}
108+
mb.setLayout(newLayout);
93109
}
94110
return (MemRefType)(mb);
95111
}

mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ func.func @fat_raw_buffer_cast_dyn_size_offset(%buf: memref<?xi32, strided<[1],
6868
}
6969

7070
// CHECK-LABEL: func @fat_raw_buffer_cast_reset_offset
71-
func.func @fat_raw_buffer_cast_reset_offset(%buf: memref<?xi32, strided<[1], offset: ?>, #gpu_global_addrspace>) -> memref<?xi32, strided<[1]>, #amdgpu.address_space<fat_raw_buffer>> {
71+
func.func @fat_raw_buffer_cast_reset_offset(%buf: memref<?xi32, strided<[1], offset: ?>, #gpu_global_addrspace>) -> memref<?xi32, #amdgpu.address_space<fat_raw_buffer>> {
7272
// CHECK: %[[desc:.*]] = builtin.unrealized_conversion_cast %{{.*}} : memref<?xi32, strided<[1], offset: ?>, 1> to !llvm.struct<(ptr<1>, ptr<1>, i64, array<1 x i64>, array<1 x i64>)>
7373
// CHECK-DAG: %[[memRefPtr:.*]] = llvm.extractvalue %[[desc]][1]
7474
// CHECK-DAG: %[[memRefOff:.*]] = llvm.extractvalue %[[desc]][2]
@@ -77,8 +77,8 @@ func.func @fat_raw_buffer_cast_reset_offset(%buf: memref<?xi32, strided<[1], off
7777
// CHECK: %[[fatBuf:.*]] = rocdl.make.buffer.rsrc %[[basePtr]], %{{.*}}, %{{.*}}, %{{.*}}
7878
// CHECK: llvm.insertvalue %[[fatBuf]], %{{.*}}[1]
7979
// CHECK: llvm.insertvalue %[[zeroOff]], %{{.*}}[2]
80-
%ret = amdgpu.fat_raw_buffer_cast %buf resetOffset : memref<?xi32, strided<[1], offset: ?>, #gpu_global_addrspace> to memref<?xi32, strided<[1]>, #amdgpu.address_space<fat_raw_buffer>>
81-
return %ret : memref<?xi32, strided<[1]>, #amdgpu.address_space<fat_raw_buffer>>
80+
%ret = amdgpu.fat_raw_buffer_cast %buf resetOffset : memref<?xi32, strided<[1], offset: ?>, #gpu_global_addrspace> to memref<?xi32, #amdgpu.address_space<fat_raw_buffer>>
81+
return %ret : memref<?xi32, #amdgpu.address_space<fat_raw_buffer>>
8282
}
8383

8484
// CHECK-LABEL: func @fat_raw_buffer_cast_valid_bytes

mlir/test/Dialect/AMDGPU/ops.mlir

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -360,10 +360,54 @@ func.func @fat_raw_buffer_cast_easy(%m: memref<8xi32>) -> memref<8xi32, #amdgpu.
360360
// CHECK-SAME: cacheSwizzleStride(%{{[^)]*}})
361361
// CHECK-SAME: boundsCheck(false)
362362
// CHECK-SAME: resetOffset
363-
func.func @fat_raw_buffer_cast(%m: memref<8xi32, strided<[1], offset: ?>>, %validBytes: i32, %cacheSwizzle: i14) -> memref<8xi32, strided<[1]>, #amdgpu.address_space<fat_raw_buffer>> {
363+
func.func @fat_raw_buffer_cast(%m: memref<8xi32, strided<[1], offset: ?>>, %validBytes: i32, %cacheSwizzle: i14) -> memref<8xi32, #amdgpu.address_space<fat_raw_buffer>> {
364364
%ret = amdgpu.fat_raw_buffer_cast %m validBytes(%validBytes) cacheSwizzleStride(%cacheSwizzle) boundsCheck(false) resetOffset
365-
: memref<8xi32, strided<[1], offset: ?>> to memref<8xi32, strided<[1]>, #amdgpu.address_space<fat_raw_buffer>>
366-
func.return %ret : memref<8xi32, strided<[1]>, #amdgpu.address_space<fat_raw_buffer>>
365+
: memref<8xi32, strided<[1], offset: ?>> to memref<8xi32, #amdgpu.address_space<fat_raw_buffer>>
366+
func.return %ret : memref<8xi32, #amdgpu.address_space<fat_raw_buffer>>
367+
}
368+
369+
// CHECK-LABEL: func @fat_raw_buffer_cast_dynamic_1d_reset_offset
370+
// CHECK: amdgpu.fat_raw_buffer_cast
371+
func.func @fat_raw_buffer_cast_dynamic_1d_reset_offset(%m: memref<?xi32, strided<[1], offset: ?>>) -> memref<?xi32, #amdgpu.address_space<fat_raw_buffer>> {
372+
%ret = amdgpu.fat_raw_buffer_cast %m resetOffset
373+
: memref<?xi32, strided<[1], offset: ?>> to memref<?xi32, #amdgpu.address_space<fat_raw_buffer>>
374+
func.return %ret : memref<?xi32, #amdgpu.address_space<fat_raw_buffer>>
375+
}
376+
377+
// CHECK-LABEL: func @fat_raw_buffer_cast_dynamic_0d_reset_offset
378+
// CHECK: %[[ret:.+]] = amdgpu.fat_raw_buffer_cast
379+
// CHECK: return %[[ret]]
380+
func.func @fat_raw_buffer_cast_dynamic_0d_reset_offset(%m: memref<i32, strided<[], offset: ?>>) -> memref<i32, #amdgpu.address_space<fat_raw_buffer>> {
381+
%ret = amdgpu.fat_raw_buffer_cast %m resetOffset
382+
: memref<i32, strided<[], offset: ?>> to memref<i32, #amdgpu.address_space<fat_raw_buffer>>
383+
func.return %ret : memref<i32, #amdgpu.address_space<fat_raw_buffer>>
384+
}
385+
386+
// CHECK-LABEL: func @fat_raw_buffer_cast_static_shape_2d_reset_offset
387+
// CHECK: %[[ret:.+]] = amdgpu.fat_raw_buffer_cast
388+
// CHECK: return %[[ret]]
389+
func.func @fat_raw_buffer_cast_static_shape_2d_reset_offset(%m: memref<4x4xi32, strided<[4, 1], offset: ?>>) -> memref<4x4xi32, #amdgpu.address_space<fat_raw_buffer>> {
390+
%ret = amdgpu.fat_raw_buffer_cast %m resetOffset
391+
: memref<4x4xi32, strided<[4, 1], offset: ?>> to memref<4x4xi32, #amdgpu.address_space<fat_raw_buffer>>
392+
func.return %ret : memref<4x4xi32, #amdgpu.address_space<fat_raw_buffer>>
393+
}
394+
395+
// CHECK-LABEL: func @fat_raw_buffer_cast_dynamic_2d_reset_offset
396+
// CHECK: %[[ret:.+]] = amdgpu.fat_raw_buffer_cast
397+
// CHECK: return %[[ret]]
398+
func.func @fat_raw_buffer_cast_dynamic_2d_reset_offset(%m: memref<?x?xi32, strided<[?, 1], offset: ?>>) -> memref<?x?xi32, strided<[?, 1]>, #amdgpu.address_space<fat_raw_buffer>> {
399+
%ret = amdgpu.fat_raw_buffer_cast %m resetOffset
400+
: memref<?x?xi32, strided<[?, 1], offset: ?>> to memref<?x?xi32, strided<[?, 1]>, #amdgpu.address_space<fat_raw_buffer>>
401+
func.return %ret : memref<?x?xi32, strided<[?, 1]>, #amdgpu.address_space<fat_raw_buffer>>
402+
}
403+
404+
// CHECK-LABEL: func @fat_raw_buffer_cast_noncontiguous_2d_reset_offset
405+
// CHECK: %[[ret:.+]] = amdgpu.fat_raw_buffer_cast
406+
// CHECK: return %[[ret]]
407+
func.func @fat_raw_buffer_cast_noncontiguous_2d_reset_offset(%m: memref<4x4xi32, strided<[8, 1], offset: ?>>) -> memref<4x4xi32, strided<[8, 1]>, #amdgpu.address_space<fat_raw_buffer>> {
408+
%ret = amdgpu.fat_raw_buffer_cast %m resetOffset
409+
: memref<4x4xi32, strided<[8, 1], offset: ?>> to memref<4x4xi32, strided<[8, 1]>, #amdgpu.address_space<fat_raw_buffer>>
410+
func.return %ret : memref<4x4xi32, strided<[8, 1]>, #amdgpu.address_space<fat_raw_buffer>>
367411
}
368412

369413
// CHECK-LABEL: func @raw_buffer_load_f32_from_rank_1

utils/bazel/llvm-project-overlay/libc/BUILD.bazel

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3515,6 +3515,14 @@ libc_math_function(
35153515
],
35163516
)
35173517

3518+
libc_math_function(
3519+
name = "hypotf16",
3520+
additional_deps = [
3521+
":__support_fputil_multiply_add",
3522+
":__support_fputil_sqrt",
3523+
],
3524+
)
3525+
35183526
libc_math_function(name = "ilogb")
35193527

35203528
libc_math_function(name = "ilogbf")

utils/bazel/llvm-project-overlay/libc/test/libc_test_rules.bzl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ def libc_test(
6262
name = name,
6363
local_defines = local_defines + LIBC_CONFIGURE_OPTIONS,
6464
deps = deps,
65-
copts = copts + libc_common_copts(),
65+
# For complex floating point literals.
66+
copts = copts + libc_common_copts() + ["-fext-numeric-literals"],
6667
linkstatic = 1,
6768
**kwargs
6869
)

utils/bazel/llvm-project-overlay/libc/test/src/math/BUILD.bazel

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,13 @@ math_mpfr_test(
286286
],
287287
)
288288

289+
math_mpfr_test(
290+
name = "hypotf16",
291+
hdrs = [
292+
"HypotTest.h",
293+
],
294+
)
295+
289296
math_mpfr_test(
290297
name = "llrint",
291298
hdrs = ["RoundToIntegerTest.h"],

utils/bazel/llvm-project-overlay/libc/test/src/math/smoke/BUILD.bazel

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -898,6 +898,11 @@ math_test(
898898
hdrs = ["HypotTest.h"],
899899
)
900900

901+
math_test(
902+
name = "hypotf16",
903+
hdrs = ["HypotTest.h"],
904+
)
905+
901906
math_test(
902907
name = "ilogb",
903908
hdrs = ["ILogbTest.h"],

0 commit comments

Comments
 (0)