Reland "[X86][AMX] Support AMX-AVX512" #115581

phoebewang · 2024-11-09T05:00:38Z

Resolve compile fail without SSE2.

llvmbot · 2024-11-09T05:01:14Z

@llvm/pr-subscribers-llvm-ir
@llvm/pr-subscribers-clang-driver

@llvm/pr-subscribers-mc

Author: Phoebe Wang (phoebewang)

Changes

Resolve compile fail without SSE2.

Patch is 82.52 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/115581.diff

31 Files Affected:

(modified) clang/docs/ReleaseNotes.rst (+1)
(modified) clang/include/clang/Basic/BuiltinsX86_64.def (+13)
(modified) clang/include/clang/Driver/Options.td (+2)
(modified) clang/lib/Basic/Targets/X86.cpp (+6)
(modified) clang/lib/Basic/Targets/X86.h (+1)
(modified) clang/lib/Headers/CMakeLists.txt (+1)
(added) clang/lib/Headers/amxavx512intrin.h (+382)
(modified) clang/lib/Headers/immintrin.h (+4)
(modified) clang/lib/Sema/SemaX86.cpp (+6)
(added) clang/test/CodeGen/X86/amx_avx512_api.c (+52)
(added) clang/test/CodeGen/X86/amxavx512-builtins.c (+41)
(modified) clang/test/CodeGen/attr-target-x86.c (+4-4)
(modified) clang/test/Driver/x86-target-features.c (+7)
(modified) clang/test/Preprocessor/x86_target_features.c (+12)
(modified) llvm/include/llvm/IR/IntrinsicsX86.td (+51)
(modified) llvm/include/llvm/TargetParser/X86TargetParser.def (+1)
(modified) llvm/lib/Target/X86/X86.td (+4)
(modified) llvm/lib/Target/X86/X86ExpandPseudo.cpp (+62-5)
(modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+76)
(modified) llvm/lib/Target/X86/X86InstrAMX.td (+147)
(modified) llvm/lib/Target/X86/X86InstrPredicates.td (+1)
(modified) llvm/lib/Target/X86/X86LowerAMXType.cpp (+11)
(modified) llvm/lib/Target/X86/X86PreTileConfig.cpp (+15-3)
(modified) llvm/lib/TargetParser/Host.cpp (+1)
(modified) llvm/lib/TargetParser/X86TargetParser.cpp (+2)
(added) llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll (+171)
(added) llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll (+116)
(added) llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll (+61)
(added) llvm/test/MC/Disassembler/X86/amx-avx512.txt (+106)
(added) llvm/test/MC/X86/amx-avx512-att.s (+105)
(added) llvm/test/MC/X86/amx-avx512-intel.s (+105)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index f82fbb73b12162..c3424e0e6f34c9 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -739,6 +739,7 @@ X86 Support
   * Supported intrinsics of ``_mm(256|512)_(mask(z))_loadrs_epi(8|16|32|64)``.
 - Support ISA of ``AMX-FP8``.
 - Support ISA of ``AMX-TRANSPOSE``.
+- Support ISA of ``AMX-AVX512``.
 
 Arm and AArch64 Support
 ^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def
index d95e8455a304b6..9f7462b1e0d962 100644
--- a/clang/include/clang/Basic/BuiltinsX86_64.def
+++ b/clang/include/clang/Basic/BuiltinsX86_64.def
@@ -133,6 +133,12 @@ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0t1_internal, "vUsUsUsV256i*V256i*vC*z",
 TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose")
 TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1t1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose")
 TARGET_BUILTIN(__builtin_ia32_ttransposed_internal, "V256iUsUsV256i", "n", "amx-transpose")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowd2ps_internal, "V16fUsUsV256iUi", "n", "amx-avx512,avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16h_internal, "V32yUsUsV256iUi", "n", "amx-avx512,avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16l_internal, "V32yUsUsV256iUi", "n", "amx-avx512,avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowps2phh_internal, "V32xUsUsV256iUi", "n", "amx-avx512,avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowps2phl_internal, "V32xUsUsV256iUi", "n", "amx-avx512,avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_tilemovrow_internal, "V16iUsUsV256iUi", "n", "amx-avx512,avx10.2-512")
 // AMX
 TARGET_BUILTIN(__builtin_ia32_tile_loadconfig, "vvC*", "n", "amx-tile")
 TARGET_BUILTIN(__builtin_ia32_tile_storeconfig, "vvC*", "n", "amx-tile")
@@ -159,6 +165,13 @@ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1, "vIUcvC*z", "n", "amx-transpose")
 TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1t1, "vIUcvC*z", "n","amx-transpose")
 TARGET_BUILTIN(__builtin_ia32_ttransposed, "vIUcIUc", "n", "amx-transpose")
 
+TARGET_BUILTIN(__builtin_ia32_tcvtrowd2ps, "V16fIUcUi", "n", "amx-avx512,avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16h, "V32yIUcUi", "n", "amx-avx512,avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16l, "V32yIUcUi", "n", "amx-avx512,avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowps2phh, "V32xIUcUi", "n", "amx-avx512,avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowps2phl, "V32xIUcUi", "n", "amx-avx512,avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_tilemovrow, "V16iIUcUi", "n", "amx-avx512,avx10.2-512")
+
 TARGET_BUILTIN(__builtin_ia32_prefetchi, "vvC*Ui", "nc", "prefetchi")
 TARGET_BUILTIN(__builtin_ia32_cmpccxadd32, "Siv*SiSiIi", "n", "cmpccxadd")
 TARGET_BUILTIN(__builtin_ia32_cmpccxadd64, "SLLiSLLi*SLLiSLLiIi", "n", "cmpccxadd")
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 8887e0c1495d2a..0dba5672c5a85d 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6285,6 +6285,8 @@ def mno_80387 : Flag<["-"], "mno-80387">, Alias<mno_x87>;
 def mno_fp_ret_in_387 : Flag<["-"], "mno-fp-ret-in-387">, Alias<mno_x87>;
 def mmmx : Flag<["-"], "mmmx">, Group<m_x86_Features_Group>;
 def mno_mmx : Flag<["-"], "mno-mmx">, Group<m_x86_Features_Group>;
+def mamx_avx512 : Flag<["-"], "mamx-avx512">, Group<m_x86_Features_Group>;
+def mno_amx_avx512 : Flag<["-"], "mno-amx-avx512">, Group<m_x86_Features_Group>;
 def mamx_bf16 : Flag<["-"], "mamx-bf16">, Group<m_x86_Features_Group>;
 def mno_amx_bf16 : Flag<["-"], "mno-amx-bf16">, Group<m_x86_Features_Group>;
 def mamx_complex : Flag<["-"], "mamx-complex">, Group<m_x86_Features_Group>;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index d7d3adef42c79a..3c3dbfa13e452b 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -432,6 +432,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasAMXFP8 = true;
     } else if (Feature == "+amx-transpose") {
       HasAMXTRANSPOSE = true;
+    } else if (Feature == "+amx-avx512") {
+      HasAMXAVX512 = true;
     } else if (Feature == "+cmpccxadd") {
       HasCMPCCXADD = true;
     } else if (Feature == "+raoint") {
@@ -955,6 +957,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__AMX_FP8__");
   if (HasAMXTRANSPOSE)
     Builder.defineMacro("__AMX_TRANSPOSE__");
+  if (HasAMXAVX512)
+    Builder.defineMacro("__AMX_AVX512__");
   if (HasCMPCCXADD)
     Builder.defineMacro("__CMPCCXADD__");
   if (HasRAOINT)
@@ -1080,6 +1084,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
   return llvm::StringSwitch<bool>(Name)
       .Case("adx", true)
       .Case("aes", true)
+      .Case("amx-avx512", true)
       .Case("amx-bf16", true)
       .Case("amx-complex", true)
       .Case("amx-fp16", true)
@@ -1200,6 +1205,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
   return llvm::StringSwitch<bool>(Feature)
       .Case("adx", HasADX)
       .Case("aes", HasAES)
+      .Case("amx-avx512", HasAMXAVX512)
       .Case("amx-bf16", HasAMXBF16)
       .Case("amx-complex", HasAMXCOMPLEX)
       .Case("amx-fp16", HasAMXFP16)
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index e2eba63b992355..70047731b17295 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -159,6 +159,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
   bool HasAMXCOMPLEX = false;
   bool HasAMXFP8 = false;
   bool HasAMXTRANSPOSE = false;
+  bool HasAMXAVX512 = false;
   bool HasSERIALIZE = false;
   bool HasTSXLDTRK = false;
   bool HasUSERMSR = false;
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 67242cd4d981bc..76366ca1f108e9 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -146,6 +146,7 @@ set(x86_files
   adcintrin.h
   adxintrin.h
   ammintrin.h
+  amxavx512intrin.h
   amxcomplexintrin.h
   amxfp16intrin.h
   amxfp8intrin.h
diff --git a/clang/lib/Headers/amxavx512intrin.h b/clang/lib/Headers/amxavx512intrin.h
new file mode 100644
index 00000000000000..e4d77e503015af
--- /dev/null
+++ b/clang/lib/Headers/amxavx512intrin.h
@@ -0,0 +1,382 @@
+/*===--------------------- amxavx512intrin.h - AMXAVX512 --------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===------------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <amxavx512intrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AMX_AVX512INTRIN_H
+#define __AMX_AVX512INTRIN_H
+#if defined(__x86_64__) && defined(__SSE2__)
+
+#define __DEFAULT_FN_ATTRS_AVX512                                              \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("amx-avx512,avx10.2-512")))
+
+/// Moves a row from a tile register to a zmm destination register, converting
+///    the int32 source elements to fp32. The row of the tile is selected by a
+///    32b GPR.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m512i _tile_cvtrowd2ps(__tile tsrc, unsigned int row);
+/// \endcode
+///
+/// \code{.operation}
+/// VL := 512
+/// VL_bytes := VL >> 3
+/// row_index := row & 0xffff
+/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
+/// FOR i := 0 TO (VL_bytes / 4) - 1
+///     IF i + row_chunk / 4 >= tsrc.colsb / 4
+///         dst.dword[i] := 0
+///     ELSE
+///         dst.f32[i] := CONVERT_INT32_TO_FP32(tsrc.row[row_index].dword[row_chunk/4+i], RNE)
+///     FI
+/// ENDFOR
+/// dst[MAX_VL-1:VL] := 0
+/// zero_tileconfig_start()
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TCVTROWD2PS instruction.
+///
+/// \param tsrc
+///    The source tile. Max size is 1024 Bytes.
+/// \param row
+///    The row of the source tile
+#define _tile_cvtrowd2ps(tsrc, row) __builtin_ia32_tcvtrowd2ps(tsrc, row)
+
+/// Moves a row from a tile register to a zmm destination register, converting
+///    the fp32 source elements to bf16. It places the resulting bf16 elements
+///    in the high 16 bits within each dword. The row of the tile is selected
+///    by a 32b GPR.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m512i _tile_cvtrowps2pbf16h(__tile tsrc, unsigned int row);
+/// \endcode
+///
+/// \code{.operation}
+/// VL := 512
+/// VL_bytes := VL >> 3
+/// row_index := row & 0xffff
+/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
+/// FOR i := 0 TO (VL_bytes / 4) - 1
+///     IF i + row_chunk / 4 >= tsrc.colsb / 4
+///         dst.dword[i] := 0
+///     ELSE
+///         dst.word[2*i+0] := 0
+///         dst.bf16[2*i+1] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
+///     FI
+/// ENDFOR
+/// dst[MAX_VL-1:VL] := 0
+/// zero_tileconfig_start()
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TCVTROWPS2PBF16H instruction.
+///
+/// \param tsrc
+///    The source tile. Max size is 1024 Bytes.
+/// \param row
+///    The the row of the source tile.
+#define _tile_cvtrowps2pbf16h(tsrc, row)                                       \
+  __builtin_ia32_tcvtrowps2pbf16h(tsrc, row)
+
+/// Moves a row from a tile register to a zmm destination register, converting
+///    the fp32 source elements to bf16. It places the resulting bf16 elements
+///    in the low 16 bits within each dword. The row of the tile is selected
+///    by a 32b GPR.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m512i _tile_cvtrowps2pbf16l(__tile tsrc, unsigned int row);
+/// \endcode
+///
+/// \code{.operation}
+/// VL := 512
+/// VL_bytes := VL >> 3
+/// row_index := row & 0xffff
+/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
+/// FOR i := 0 TO (VL_bytes / 4) - 1
+///     IF i + row_chunk / 4 >= tsrc.colsb / 4
+///         dst.dword[i] := 0
+///     ELSE
+///         dst.word[2*i+1] := 0
+///         dst.bf16[2*i+0] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
+///     FI
+/// ENDFOR
+/// dst[MAX_VL-1:VL] := 0
+/// zero_tileconfig_start()
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TCVTROWPS2PBF16L instruction.
+///
+/// \param tsrc
+///    The source tile. Max size is 1024 Bytes.
+/// \param row
+///    The the row of the source tile.
+#define _tile_cvtrowps2pbf16l(tsrc, row)                                       \
+  __builtin_ia32_tcvtrowps2pbf16l(tsrc, row)
+
+/// Moves a row from a tile register to a zmm destination register, converting
+///    the fp32 source elements to fp16. It places the resulting fp16 elements
+///    in the high 16 bits within each dword. The row of the tile is selected
+///    by a 32b GPR.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m512i _tile_cvtrowps2phh(__tile tsrc, unsigned int row);
+/// \endcode
+///
+/// \code{.operation}
+/// VL := 512
+/// VL_bytes := VL >> 3
+/// row_index := row & 0xffff
+/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
+/// FOR i := 0 TO (VL_bytes / 4) - 1
+///     IF i + row_chunk / 4 >= tsrc.colsb / 4
+///         dst.dword[i] := 0
+///     ELSE
+///         dst.word[2*i+0] := 0
+///         dst.fp16[2*i+1] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
+///     FI
+/// ENDFOR
+/// dst[MAX_VL-1:VL] := 0
+/// zero_tileconfig_start()
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TCVTROWPS2PHH instruction.
+///
+/// \param tsrc
+///    The source tile. Max size is 1024 Bytes.
+/// \param row
+///    The the row of the source tile.
+#define _tile_cvtrowps2phh(tsrc, row) __builtin_ia32_tcvtrowps2phh(tsrc, row)
+
+/// Moves a row from a tile register to a zmm destination register, converting
+///    the fp32 source elements to fp16. It places the resulting fp16 elements
+///    in the low 16 bits within each dword. The row of the tile is selected
+///    by a 32b GPR.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m512i _tile_cvtrowps2phl(__tile tsrc, unsigned int row);
+/// \endcode
+///
+/// \code{.operation}
+/// VL := 512
+/// VL_bytes := VL >> 3
+/// row_index := row & 0xffff
+/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
+/// FOR i := 0 TO (VL_bytes / 4) - 1
+///     IF i + row_chunk / 4 >= tsrc.colsb / 4
+///         dst.dword[i] := 0
+///     ELSE
+///         dst.word[2*i+1] := 0
+///         dst.fp16[2*i+0] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
+///     FI
+/// ENDFOR
+/// dst[MAX_VL-1:VL] := 0
+/// zero_tileconfig_start()
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TCVTROWPS2PHL instruction.
+///
+/// \param tsrc
+///    The source tile. Max size is 1024 Bytes.
+/// \param row
+///    The the row of the source tile.
+#define _tile_cvtrowps2phl(tsrc, row) __builtin_ia32_tcvtrowps2phl(tsrc, row)
+
+/// Move one row of a tile data to a v16f32 data.
+/// The row of the tile is selected by a 32b GPR.
+///
+/// \headerfile <immintrin.h>
+///
+/// \code
+/// __m512 _tile_movrow(__tile a, unsigned b);
+/// \endcode
+///
+/// This intrinsic corresponds to the <c> TILEMOVROW </c> instruction.
+///
+/// \param a
+///     The 1st source tile. Max size is 1024 Bytes.
+/// \param b
+///     The 2nd source r32. Size is 4 Bytes.
+/// \returns
+///     The destination v16f32 data. Size is 64 Bytes.
+///
+/// \code{.operation}
+/// VL := 512
+/// VL_bytes := VL>>3
+/// row_index := b&0xffff
+/// row_chunk := ((b>>16)&0xffff) * VL_bytes
+/// FOR i := 0 TO (VL_bytes-1)
+///     IF (row_chunk + i >= a.colsb)
+///             dst.byte[i] := 0
+///     ELSE
+///             dst.byte[i] := a.row[row_index].byte[row_chunk+i]
+/// ENDFOR
+/// \endcode
+#define _tile_movrow(a, b) __builtin_ia32_tilemovrow(a, b)
+
+/// This is internal intrinsic. C/C++ user should avoid calling it directly.
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowd2ps_internal(
+    unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
+  return __builtin_ia32_tcvtrowd2ps_internal(m, n, src, u);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
+_tile_cvtrowps2pbf16h_internal(unsigned short m, unsigned short n,
+                               _tile1024i src, unsigned u) {
+  return __builtin_ia32_tcvtrowps2pbf16h_internal(m, n, src, u);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
+_tile_cvtrowps2pbf16l_internal(unsigned short m, unsigned short n,
+                               _tile1024i src, unsigned u) {
+  return __builtin_ia32_tcvtrowps2pbf16l_internal(m, n, src, u);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phh_internal(
+    unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
+  return __builtin_ia32_tcvtrowps2phh_internal(m, n, src, u);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phl_internal(
+    unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
+  return __builtin_ia32_tcvtrowps2phl_internal(m, n, src, u);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS_AVX512 _tile_movrow_internal(
+    unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
+  return (__m512i)__builtin_ia32_tilemovrow_internal(m, n, src, u);
+}
+
+/// Move a row from a tile (src0) to a v16f32 dst, converting the int32 source
+/// elements to fp32. No SIMD exceptions are generated. Rounding is done as if
+/// MXCSR.RC=RNE. Embedded rounding is not supported.
+/// The row and chunk elements of tile is fetched from 32bit src1.
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the <c> TCVTROWD2PS </c> instruction.
+///
+/// \param src0
+///    The 1st source tile. Max size is 1024 Bytes.
+/// \param src1
+///    The 2nd source r32. Size is 4 Bytes.
+/// \returns
+///    The destination v16f32 data. Size is 64 Bytes.
+__DEFAULT_FN_ATTRS_AVX512
+static __m512 __tile_cvtrowd2ps(__tile1024i src0, unsigned src1) {
+  return _tile_cvtrowd2ps_internal(src0.row, src0.col, src0.tile, src1);
+}
+
+/// Move a row from a tile (src0) to a v32bf16 dst, converting the fp32 source
+/// elements to bf16 at high 16-bits of each dword.
+/// The row and chunk elements of tile is fetched from 32bit src1.
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the <c> TCVTROWPS2PBF16H </c> instruction.
+///
+/// \param src0
+///    The 1st source tile. Max size is 1024 Bytes.
+/// \param src1
+///    The 2nd source r32. Size is 4 Bytes.
+/// \returns
+///    The destination v32bf16 data. Size is 64 Bytes.
+__DEFAULT_FN_ATTRS_AVX512
+static __m512bh __tile_cvtrowps2pbf16h(__tile1024i src0, unsigned src1) {
+  return _tile_cvtrowps2pbf16h_internal(src0.row, src0.col, src0.tile, src1);
+}
+
+/// Move a row from a tile (src0) to a v32bf16 dst, converting the fp32 source
+/// elements to bf16 at low 16-bits of each dword.
+/// The row and chunk elements of tile is fetched from 32bit src1.
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the <c> TCVTROWPS2PBF16L </c> instruction.
+///
+/// \param src0
+///    The 1st source tile. Max size is 1024 Bytes.
+/// \param src1
+///    The 2nd source r32. Size is 4 Bytes.
+/// \returns
+///    The destination v32bf16 data. Size is 64 Bytes.
+__DEFAULT_FN_ATTRS_AVX512
+static __m512bh __tile_cvtrowps2pbf16l(__tile1024i src0, unsigned src1) {
+  return _tile_cvtrowps2pbf16l_internal(src0.row, src0.col, src0.tile, src1);
+}
+
+/// Move a row from a tile (src0) to a v32fp16 dst, converting the fp32 source
+/// elements to fp16 at high 16-bits of each dword.
+/// The row and chunk elements of tile is fetched from 32bit src1.
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the <c> TCVTROWPS2PHH </c> instruction.
+///
+/// \param src0
+///    The 1st source tile. Max size is 1024 Bytes.
+/// \param src1
+///    The 2nd source r32. Size is 4 Bytes.
+/// \returns
+///    The destination v32fp16 data. Size is 64 Bytes.
+__DEFAULT_FN_ATTRS_AVX512
+static __m512h __tile_cvtrowps2phh(__tile1024i src0, unsigned src1) {
+  return _tile_cvtrowps2phh_internal(src0.row, src0.col, src0.tile, src1);
+}
+
+/// Move a row from a tile (src0) to a v32fp16 dst, converting the fp32 source
+/// elements to fp16 at low 16-bits of each dword.
+/// The row and chunk elements of tile is fetched from 32bit src1.
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the <c> TCVTROWPS2PHL </c> instruction.
+///
+/// \param src0
+///    The 1st source tile. Max size is 1024 Bytes.
+/// \param src1
+///    The 2nd source r32. Size is 4 Bytes.
+/// \returns
+///    The destination v32fp16 data. Size is 64 Bytes.
+__DEFAULT_FN_ATTRS_AVX512
+static __m512h __tile_cvtrowps2phl(__tile1024i src0, unsigned src1) {
+  return _tile_cvtrowps2phl_internal(src0.row, src0.col, src0.tile, src1);
+}
+
+/// Move one row of a tile data to a v16f32 data.
+/// The row of the tile is selected by a 32b GPR.
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the <c> TILEMOVROW </c> instruction.
+///
+/// \param src0
+///    The 1st source tile. Max size is 1024 Bytes.
+/// \param src1
+///    The 2nd source r32. Size is 4 Bytes.
+/// \returns
+///    The destination v16i32 data. Size is 64 Bytes.
+__DEFAULT_FN_ATTRS_AVX512
+static __m512i __tile_movrow(__tile1024i src0, unsigned src1) {
+  return (__m512i)_tile_movrow_internal(src0.row, src0.col, src0.tile, src1);
+}
+
+#endif // __x86_64__ && __SSE2__
+#endif // __AMX_AVX512INTRIN_H
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 4bf7eac4195eec..bc240e28d59142 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -656,6 +656,10 @@ _storebe_i64(void * __P, long long __D) {
 #include <amxtransposeintrin.h>
 #endif
 
+#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_AVX512__)
+#include <amxavx512intrin.h>
+#endif
+
 #if !defined(__SCE__) || __has_feature(modules) ||                             \
     defined(__AVX512VP2INTERSECT__)
 #include <avx512vp2intersectintrin.h>
diff --git a/clang/lib/Sema/SemaX86.cpp b/clang/lib/Sema/SemaX86.cpp
index ef878d16d445f...
[truncated]

github-actions · 2024-11-09T05:03:53Z

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:

git-clang-format --diff 5e02fd8d0b3c6638220c95e997c43fdc9d7ded3c 44501d8d8a7096decc236fc41f296ae014cec991 --extensions c,cpp,h -- clang/lib/Headers/amxavx512intrin.h clang/test/CodeGen/X86/amx_avx512_api.c clang/test/CodeGen/X86/amxavx512-builtins.c clang/lib/Basic/Targets/X86.cpp clang/lib/Basic/Targets/X86.h clang/lib/Headers/immintrin.h clang/lib/Sema/SemaX86.cpp clang/test/CodeGen/attr-target-x86.c clang/test/Driver/x86-target-features.c clang/test/Preprocessor/x86_target_features.c llvm/lib/Target/X86/X86ExpandPseudo.cpp llvm/lib/Target/X86/X86ISelLowering.cpp llvm/lib/Target/X86/X86LowerAMXType.cpp llvm/lib/Target/X86/X86PreTileConfig.cpp llvm/lib/TargetParser/Host.cpp llvm/lib/TargetParser/X86TargetParser.cpp

View the diff from clang-format here.

diff --git a/clang/lib/Headers/amxavx512intrin.h b/clang/lib/Headers/amxavx512intrin.h
index e4d77e5030..14f3759192 100644
--- a/clang/lib/Headers/amxavx512intrin.h
+++ b/clang/lib/Headers/amxavx512intrin.h
@@ -37,7 +37,8 @@
 ///     IF i + row_chunk / 4 >= tsrc.colsb / 4
 ///         dst.dword[i] := 0
 ///     ELSE
-///         dst.f32[i] := CONVERT_INT32_TO_FP32(tsrc.row[row_index].dword[row_chunk/4+i], RNE)
+///         dst.f32[i] :=
+///         CONVERT_INT32_TO_FP32(tsrc.row[row_index].dword[row_chunk/4+i], RNE)
 ///     FI
 /// ENDFOR
 /// dst[MAX_VL-1:VL] := 0
@@ -73,7 +74,8 @@
 ///         dst.dword[i] := 0
 ///     ELSE
 ///         dst.word[2*i+0] := 0
-///         dst.bf16[2*i+1] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
+///         dst.bf16[2*i+1] :=
+///         CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
 ///     FI
 /// ENDFOR
 /// dst[MAX_VL-1:VL] := 0
@@ -110,7 +112,8 @@
 ///         dst.dword[i] := 0
 ///     ELSE
 ///         dst.word[2*i+1] := 0
-///         dst.bf16[2*i+0] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
+///         dst.bf16[2*i+0] :=
+///         CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
 ///     FI
 /// ENDFOR
 /// dst[MAX_VL-1:VL] := 0
@@ -147,7 +150,8 @@
 ///         dst.dword[i] := 0
 ///     ELSE
 ///         dst.word[2*i+0] := 0
-///         dst.fp16[2*i+1] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
+///         dst.fp16[2*i+1] :=
+///         CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
 ///     FI
 /// ENDFOR
 /// dst[MAX_VL-1:VL] := 0
@@ -183,7 +187,8 @@
 ///         dst.dword[i] := 0
 ///     ELSE
 ///         dst.word[2*i+1] := 0
-///         dst.fp16[2*i+0] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
+///         dst.fp16[2*i+0] :=
+///         CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
 ///     FI
 /// ENDFOR
 /// dst[MAX_VL-1:VL] := 0

llvm-ci · 2024-11-09T06:31:58Z

LLVM Buildbot has detected a new failure on builder clang-aarch64-sve-vls running on linaro-g3-04 while building clang,llvm at step 7 "ninja check 1".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/143/builds/3324

Here is the relevant piece of the build log for the reference

Step 7 (ninja check 1) failure: stage 1 checked (failure)
******************** TEST 'AddressSanitizer-Unit :: ./Asan-aarch64-inline-Dynamic-Test/66/95' FAILED ********************
Script(shard):
--
GTEST_OUTPUT=json:/home/tcwg-buildbot/worker/clang-aarch64-sve-vls/stage1/runtimes/runtimes-bins/compiler-rt/lib/asan/tests/AARCH64LinuxDynamicConfig/./Asan-aarch64-inline-Dynamic-Test-AddressSanitizer-Unit-4088106-66-95.json GTEST_SHUFFLE=0 GTEST_TOTAL_SHARDS=95 GTEST_SHARD_INDEX=66 /home/tcwg-buildbot/worker/clang-aarch64-sve-vls/stage1/runtimes/runtimes-bins/compiler-rt/lib/asan/tests/AARCH64LinuxDynamicConfig/./Asan-aarch64-inline-Dynamic-Test
--

Note: This is test shard 67 of 95.
[==========] Running 1 test from 1 test suite.
[----------] Global test environment set-up.
[----------] 1 test from AddressSanitizer
[ RUN      ] AddressSanitizer.OOB_int

--
exit: -6
--
shard JSON output does not exist: /home/tcwg-buildbot/worker/clang-aarch64-sve-vls/stage1/runtimes/runtimes-bins/compiler-rt/lib/asan/tests/AARCH64LinuxDynamicConfig/./Asan-aarch64-inline-Dynamic-Test-AddressSanitizer-Unit-4088106-66-95.json
********************

Step 13 (test-suite) failure: 1200 seconds without output running [b'/home/tcwg-buildbot/worker/clang-aarch64-sve-vls/test/sandbox/bin/python', b'/home/tcwg-buildbot/worker/clang-aarch64-sve-vls/test/sandbox/bin/lnt', b'runtest', b'test-suite', b'--no-timestamp', b'--sandbox', b'/home/tcwg-buildbot/worker/clang-aarch64-sve-vls/test/sandbox', b'--test-suite', b'/home/tcwg-buildbot/worker/clang-aarch64-sve-vls/test/test-suite', b'--cc', b'/home/tcwg-buildbot/worker/clang-aarch64-sve-vls/stage1.install/bin/clang', b'--cxx', b'/home/tcwg-buildbot/worker/clang-aarch64-sve-vls/stage1.install/bin/clang++', b'--use-lit', b'/home/tcwg-buildbot/worker/clang-aarch64-sve-vls/stage1/bin/llvm-lit', b'--build-tool-options', b'"-k"', b'--cmake-define=TEST_SUITE_FORTRAN:STRING=ON', b'--cmake-define=CMAKE_Fortran_COMPILER=/home/tcwg-buildbot/worker/clang-aarch64-sve-vls/stage1.install/bin/flang', b'--cppflags', b'-mcpu=neoverse-512tvb -msve-vector-bits=256 -mllvm -treat-scalable-fixed-error-as-warning=false -O3', b'--threads=32', b'--build-threads=32'], attempting to kill
...
-- Looking for __init_cpu_features - not found
-- Looking for __ARM_NEON
-- Looking for __ARM_NEON - found
-- No reference output found for test aarch64_neon_intrinsics
-- Found compiler-rt builtin library: libclang_rt.builtins.a
-- Performing Test COMPILER_HAS_MATRIX_FLAG
-- Performing Test COMPILER_HAS_MATRIX_FLAG - Success
-- Looking for mmap
-- Looking for mmap - found
-- Configuring done (34.7s)
command timed out: 1200 seconds without output running [b'/home/tcwg-buildbot/worker/clang-aarch64-sve-vls/test/sandbox/bin/python', b'/home/tcwg-buildbot/worker/clang-aarch64-sve-vls/test/sandbox/bin/lnt', b'runtest', b'test-suite', b'--no-timestamp', b'--sandbox', b'/home/tcwg-buildbot/worker/clang-aarch64-sve-vls/test/sandbox', b'--test-suite', b'/home/tcwg-buildbot/worker/clang-aarch64-sve-vls/test/test-suite', b'--cc', b'/home/tcwg-buildbot/worker/clang-aarch64-sve-vls/stage1.install/bin/clang', b'--cxx', b'/home/tcwg-buildbot/worker/clang-aarch64-sve-vls/stage1.install/bin/clang++', b'--use-lit', b'/home/tcwg-buildbot/worker/clang-aarch64-sve-vls/stage1/bin/llvm-lit', b'--build-tool-options', b'"-k"', b'--cmake-define=TEST_SUITE_FORTRAN:STRING=ON', b'--cmake-define=CMAKE_Fortran_COMPILER=/home/tcwg-buildbot/worker/clang-aarch64-sve-vls/stage1.install/bin/flang', b'--cppflags', b'-mcpu=neoverse-512tvb -msve-vector-bits=256 -mllvm -treat-scalable-fixed-error-as-warning=false -O3', b'--threads=32', b'--build-threads=32'], attempting to kill
process killed by signal 9
program finished with exit code -1
elapsedTime=1238.980044

llvm-ci · 2024-11-09T06:57:15Z

LLVM Buildbot has detected a new failure on builder clang-aarch64-sve-vla-2stage running on linaro-g3-03 while building clang,llvm at step 11 "build stage 2".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/41/builds/3303

Here is the relevant piece of the build log for the reference

Step 11 (build stage 2) failure: 'ninja' (failure)
...
[7886/8738] Building CXX object tools/flang/lib/Lower/CMakeFiles/FortranLower.dir/ConvertExpr.cpp.o
[7887/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/canonicalize-do.cpp.o
[7888/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/check-case.cpp.o
[7889/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/check-allocate.cpp.o
[7890/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/check-arithmeticif.cpp.o
[7891/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/check-deallocate.cpp.o
[7892/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/canonicalize-directives.cpp.o
[7893/8738] Building CXX object tools/flang/lib/Parser/CMakeFiles/FortranParser.dir/parse-tree.cpp.o
[7894/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/check-cuda.cpp.o
[7895/8738] Building CXX object tools/flang/lib/Lower/CMakeFiles/FortranLower.dir/OpenMP/OpenMP.cpp.o
FAILED: tools/flang/lib/Lower/CMakeFiles/FortranLower.dir/OpenMP/OpenMP.cpp.o 
/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage1.install/bin/clang++ -DFLANG_INCLUDE_TESTS=1 -DFLANG_LITTLE_ENDIAN=1 -DGTEST_HAS_RTTI=0 -D_DEBUG -D_GLIBCXX_ASSERTIONS -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage2/tools/flang/lib/Lower -I/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/llvm/flang/lib/Lower -I/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/llvm/flang/include -I/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage2/tools/flang/include -I/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage2/include -I/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/llvm/llvm/include -isystem /home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/llvm/llvm/../mlir/include -isystem /home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage2/tools/mlir/include -isystem /home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage2/tools/clang/include -isystem /home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/llvm/llvm/../clang/include -mcpu=neoverse-512tvb -mllvm -scalable-vectorization=preferred -mllvm -treat-scalable-fixed-error-as-warning=false -fPIC -fno-semantic-interposition -fvisibility-inlines-hidden -Werror=date-time -Werror=unguarded-availability-new -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wmissing-field-initializers -pedantic -Wno-long-long -Wc++98-compat-extra-semi -Wimplicit-fallthrough -Wcovered-switch-default -Wno-noexcept-type -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wsuggest-override -Wno-comment -Wstring-conversion -Wmisleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -ffunction-sections -fdata-sections -Wno-deprecated-copy -Wno-string-conversion -Wno-ctad-maybe-unsupported -Wno-unused-command-line-argument -Wstring-conversion           -Wcovered-switch-default -Wno-nested-anon-types -O3 -DNDEBUG -std=c++17  -fno-exceptions -funwind-tables -fno-rtti -UNDEBUG -MD -MT tools/flang/lib/Lower/CMakeFiles/FortranLower.dir/OpenMP/OpenMP.cpp.o -MF tools/flang/lib/Lower/CMakeFiles/FortranLower.dir/OpenMP/OpenMP.cpp.o.d -o tools/flang/lib/Lower/CMakeFiles/FortranLower.dir/OpenMP/OpenMP.cpp.o -c /home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/llvm/flang/lib/Lower/OpenMP/OpenMP.cpp
Killed
[7896/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/check-if-stmt.cpp.o
[7897/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/check-namelist.cpp.o
[7898/8738] Building CXX object tools/flang/lib/Parser/CMakeFiles/FortranParser.dir/program-parsers.cpp.o
[7899/8738] Building CXX object tools/flang/lib/Parser/CMakeFiles/FortranParser.dir/openacc-parsers.cpp.o
[7900/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/check-stop.cpp.o
[7901/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/assignment.cpp.o
[7902/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/check-return.cpp.o
[7903/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/check-nullify.cpp.o
[7904/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/check-io.cpp.o
[7905/8738] Building CXX object tools/flang/lib/Parser/CMakeFiles/FortranParser.dir/unparse.cpp.o
[7906/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/compute-offsets.cpp.o
[7907/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/check-coarray.cpp.o
[7908/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/canonicalize-acc.cpp.o
[7909/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/check-declarations.cpp.o
[7910/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/check-purity.cpp.o
[7911/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/check-data.cpp.o
[7912/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/program-tree.cpp.o
[7913/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/check-select-rank.cpp.o
[7914/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/check-select-type.cpp.o
[7915/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/check-call.cpp.o
[7916/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/definable.cpp.o
[7917/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/data-to-inits.cpp.o
[7918/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/rewrite-directives.cpp.o
[7919/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/scope.cpp.o
[7920/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/mod-file.cpp.o
[7921/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/resolve-labels.cpp.o
[7922/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/resolve-names-utils.cpp.o
[7923/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/check-acc-structure.cpp.o
[7924/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/runtime-type-info.cpp.o
[7925/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/check-do-forall.cpp.o
[7926/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/check-omp-structure.cpp.o
[7927/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/rewrite-parse-tree.cpp.o
[7928/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/pointer-assignment.cpp.o
[7929/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/resolve-directives.cpp.o
[7930/8738] Building CXX object tools/flang/lib/Parser/CMakeFiles/FortranParser.dir/openmp-parsers.cpp.o
[7931/8738] Building CXX object tools/flang/lib/Semantics/CMakeFiles/FortranSemantics.dir/expression.cpp.o

Resolve compile fail without SSE2.

phoebewang added 2 commits November 9, 2024 12:48

Reland "[X86][AMX] Support AMX-AVX512"

54b3ba2

Resolve compile fail without SSE2.

Resolve compile fail without SSE2

44501d8

phoebewang merged commit 8f44013 into llvm:main Nov 9, 2024
13 of 17 checks passed

phoebewang deleted the AMX-AVX512 branch November 9, 2024 05:26

phoebewang mentioned this pull request Nov 9, 2024

[X86][AMX] Support AMX-AVX512 #114070

Merged

Groverkss pushed a commit to iree-org/llvm-project that referenced this pull request Nov 15, 2024

Reland "[X86][AMX] Support AMX-AVX512" (llvm#115581)

e1d8a90

Resolve compile fail without SSE2.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Reland "[X86][AMX] Support AMX-AVX512" #115581

Reland "[X86][AMX] Support AMX-AVX512" #115581

Uh oh!

phoebewang commented Nov 9, 2024

Uh oh!

llvmbot commented Nov 9, 2024 •

edited

Loading

Uh oh!

github-actions bot commented Nov 9, 2024

Uh oh!

Uh oh!

llvm-ci commented Nov 9, 2024

Uh oh!

llvm-ci commented Nov 9, 2024

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

Reland "[X86][AMX] Support AMX-AVX512" #115581

Reland "[X86][AMX] Support AMX-AVX512" #115581

Uh oh!

Conversation

phoebewang commented Nov 9, 2024

Uh oh!

llvmbot commented Nov 9, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Nov 9, 2024

Uh oh!

Uh oh!

llvm-ci commented Nov 9, 2024

Uh oh!

llvm-ci commented Nov 9, 2024

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

llvmbot commented Nov 9, 2024 •

edited

Loading