Skip to content

Commit 8dcc435

Browse files
committed
Remove amx-transpose
1 parent ba4ed95 commit 8dcc435

File tree

1 file changed

+0
-319
lines changed
  • crates/core_arch/src/x86_64

1 file changed

+0
-319
lines changed

crates/core_arch/src/x86_64/amx.rs

Lines changed: 0 additions & 319 deletions
Original file line numberDiff line numberDiff line change
@@ -111,24 +111,6 @@ pub unsafe fn _tile_dpbf16ps<const DST: i32, const A: i32, const B: i32>() {
111111
tdpbf16ps(DST as i8, A as i8, B as i8);
112112
}
113113

114-
/// Compute transpose and dot-product of BF16 (16-bit) floating-point pairs in tiles a and b,
115-
/// accumulating the intermediate single-precision (32-bit) floating-point elements
116-
/// with elements in dst, and store the 32-bit result back to tile dst.
117-
#[inline]
118-
#[rustc_legacy_const_generics(0, 1, 2)]
119-
#[target_feature(enable = "amx-bf16,amx-transpose")]
120-
#[cfg_attr(
121-
all(test, any(target_os = "linux", target_env = "msvc")),
122-
assert_instr(ttdpbf16ps, DST = 0, A = 1, B = 2)
123-
)]
124-
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
125-
pub unsafe fn _tile_tdpbf16ps<const DST: i32, const A: i32, const B: i32>() {
126-
static_assert_uimm_bits!(DST, 3);
127-
static_assert_uimm_bits!(A, 3);
128-
static_assert_uimm_bits!(B, 3);
129-
ttdpbf16ps(DST as i8, A as i8, B as i8);
130-
}
131-
132114
/// Compute dot-product of bytes in tiles with a source/destination accumulator.
133115
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding
134116
/// signed 8-bit integers in b, producing 4 intermediate 32-bit results.
@@ -218,24 +200,6 @@ pub unsafe fn _tile_dpfp16ps<const DST: i32, const A: i32, const B: i32>() {
218200
tdpfp16ps(DST as i8, A as i8, B as i8);
219201
}
220202

221-
/// Compute transpose and dot-product of FP16 (16-bit) floating-point pairs in tiles a and b,
222-
/// accumulating the intermediate single-precision (32-bit) floating-point elements
223-
/// with elements in dst, and store the 32-bit result back to tile dst.
224-
#[inline]
225-
#[rustc_legacy_const_generics(0, 1, 2)]
226-
#[target_feature(enable = "amx-fp16,amx-transpose")]
227-
#[cfg_attr(
228-
all(test, any(target_os = "linux", target_env = "msvc")),
229-
assert_instr(ttdpfp16ps, DST = 0, A = 1, B = 2)
230-
)]
231-
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
232-
pub unsafe fn _tile_tdpfp16ps<const DST: i32, const A: i32, const B: i32>() {
233-
static_assert_uimm_bits!(DST, 3);
234-
static_assert_uimm_bits!(A, 3);
235-
static_assert_uimm_bits!(B, 3);
236-
ttdpfp16ps(DST as i8, A as i8, B as i8);
237-
}
238-
239203
/// Perform matrix multiplication of two tiles containing complex elements and accumulate the results into a packed single precision tile.
240204
/// Each dword element in input tiles a and b is interpreted as a complex number with FP16 real part and FP16 imaginary part.
241205
/// Calculates the imaginary part of the result. For each possible combination of (row of a, column of b),
@@ -278,87 +242,6 @@ pub unsafe fn _tile_cmmrlfp16ps<const DST: i32, const A: i32, const B: i32>() {
278242
tcmmrlfp16ps(DST as i8, A as i8, B as i8);
279243
}
280244

281-
/// Perform matrix multiplication of two tiles containing complex elements and accumulate the results into a packed single precision tile.
282-
/// Each dword element in input tiles a and b is interpreted as a complex number with FP16 real part and FP16 imaginary part.
283-
/// Calculates the imaginary part of the result. For each possible combination of (transposed column of a, column of b),
284-
/// it performs a set of multiplication and accumulations on all corresponding complex numbers (one from a and one from b).
285-
/// The imaginary part of the a element is multiplied with the real part of the corresponding b element, and the real part of
286-
/// the a element is multiplied with the imaginary part of the corresponding b elements. The two accumulated results are added,
287-
/// and then accumulated into the corresponding row and column of dst.
288-
#[inline]
289-
#[rustc_legacy_const_generics(0, 1, 2)]
290-
#[target_feature(enable = "amx-complex,amx-transpose")]
291-
#[cfg_attr(
292-
all(test, any(target_os = "linux", target_env = "msvc")),
293-
assert_instr(ttcmmimfp16ps, DST = 0, A = 1, B = 2)
294-
)]
295-
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
296-
pub unsafe fn _tile_tcmmimfp16ps<const DST: i32, const A: i32, const B: i32>() {
297-
static_assert_uimm_bits!(DST, 3);
298-
static_assert_uimm_bits!(A, 3);
299-
static_assert_uimm_bits!(B, 3);
300-
ttcmmimfp16ps(DST as i8, A as i8, B as i8);
301-
}
302-
303-
/// Perform matrix multiplication of two tiles containing complex elements and accumulate the results into a packed single precision tile.
304-
/// Each dword element in input tiles a and b is interpreted as a complex number with FP16 real part and FP16 imaginary part.
305-
/// Calculates the real part of the result. For each possible combination of (transposed column of a, column of b),
306-
/// it performs a set of multiplication and accumulations on all corresponding complex numbers (one from a and one from b).
307-
/// The real part of the a element is multiplied with the real part of the corresponding b element, and the negated imaginary part of
308-
/// the a element is multiplied with the imaginary part of the corresponding b elements.
309-
/// The two accumulated results are added, and then accumulated into the corresponding row and column of dst.
310-
#[inline]
311-
#[rustc_legacy_const_generics(0, 1, 2)]
312-
#[target_feature(enable = "amx-complex,amx-transpose")]
313-
#[cfg_attr(
314-
all(test, any(target_os = "linux", target_env = "msvc")),
315-
assert_instr(ttcmmrlfp16ps, DST = 0, A = 1, B = 2)
316-
)]
317-
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
318-
pub unsafe fn _tile_ttcmmrlfp16ps<const DST: i32, const A: i32, const B: i32>() {
319-
static_assert_uimm_bits!(DST, 3);
320-
static_assert_uimm_bits!(A, 3);
321-
static_assert_uimm_bits!(B, 3);
322-
ttcmmrlfp16ps(DST as i8, A as i8, B as i8);
323-
}
324-
325-
/// Perform matrix conjugate transpose and multiplication of two tiles containing complex elements and accumulate the results into a packed single precision tile.
326-
/// Each dword element in input tiles a and b is interpreted as a complex number with FP16 real part and FP16 imaginary part.
327-
/// Calculates the imaginary part of the result. For each possible combination of (transposed column of a, column of b),
328-
/// it performs a set of multiplication and accumulations on all corresponding complex numbers (one from a and one from b).
329-
/// The negated imaginary part of the a element is multiplied with the real part of the corresponding b element, and the real part of
330-
/// the a element is multiplied with the imaginary part of the corresponding b elements. The two accumulated results are added,
331-
/// and then accumulated into the corresponding row and column of dst.
332-
#[inline]
333-
#[rustc_legacy_const_generics(0, 1, 2)]
334-
#[target_feature(enable = "amx-complex,amx-transpose")]
335-
#[cfg_attr(
336-
all(test, any(target_os = "linux", target_env = "msvc")),
337-
assert_instr(tconjtcmmimfp16ps, DST = 0, A = 1, B = 2)
338-
)]
339-
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
340-
pub unsafe fn _tile_conjtcmmimfp16ps<const DST: i32, const A: i32, const B: i32>() {
341-
static_assert_uimm_bits!(DST, 3);
342-
static_assert_uimm_bits!(A, 3);
343-
static_assert_uimm_bits!(B, 3);
344-
tconjtcmmimfp16ps(DST as i8, A as i8, B as i8);
345-
}
346-
347-
/// Perform a conjugate transpose of an FP16-pair of complex numbers in tile a, and store the result in tile dst.
348-
#[inline]
349-
#[rustc_legacy_const_generics(0, 1)]
350-
#[target_feature(enable = "amx-complex,amx-transpose")]
351-
#[cfg_attr(
352-
all(test, any(target_os = "linux", target_env = "msvc")),
353-
assert_instr(tconjtfp16, DST = 0, A = 1)
354-
)]
355-
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
356-
pub unsafe fn _tile_conjtfp16<const DST: i32, const A: i32>() {
357-
static_assert_uimm_bits!(DST, 3);
358-
static_assert_uimm_bits!(A, 3);
359-
tconjtfp16(DST as i8, A as i8);
360-
}
361-
362245
/// Compute dot-product of BF8 (8-bit) floating-point pairs in tiles a and b,
363246
/// accumulating the intermediate single-precision (32-bit) floating-point elements
364247
/// with elements in dst, and store the 32-bit result back to tile dst.
@@ -497,176 +380,6 @@ pub unsafe fn _tile_mmultf32ps<const DST: i32, const A: i32, const B: i32>() {
497380
tmmultf32ps(DST as i8, A as i8, B as i8);
498381
}
499382

500-
/// Perform matrix transpose and multiplication of two tiles a and b, containing packed single precision (32-bit)
501-
/// floating-point elements, which are converted to TF32 (tensor-float32) format, and accumulate the
502-
/// results into a packed single precision tile.
503-
/// For each possible combination of (transposed column of a, column of b), it performs
504-
/// - convert to TF32
505-
/// - multiply the corresponding elements of a and b
506-
/// - accumulate the results into the corresponding row and column of dst using round-to-nearest-even
507-
/// rounding mode.
508-
/// Output FP32 denormals are always flushed to zero, input single precision denormals are always
509-
/// handled and *not* treated as zero.
510-
#[inline]
511-
#[rustc_legacy_const_generics(0, 1, 2)]
512-
#[target_feature(enable = "amx-tf32,amx-transpose")]
513-
#[cfg_attr(
514-
all(test, any(target_os = "linux", target_env = "msvc")),
515-
assert_instr(ttmmultf32ps, DST = 0, A = 1, B = 2)
516-
)]
517-
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
518-
pub unsafe fn _tile_tmmultf32ps<const DST: i32, const A: i32, const B: i32>() {
519-
static_assert_uimm_bits!(DST, 3);
520-
static_assert_uimm_bits!(A, 3);
521-
static_assert_uimm_bits!(B, 3);
522-
ttmmultf32ps(DST as i8, A as i8, B as i8);
523-
}
524-
525-
/// Transposes 32-bit elements in tile a and stores the result in tile dst.
526-
#[inline]
527-
#[rustc_legacy_const_generics(0, 1)]
528-
#[target_feature(enable = "amx-transpose")]
529-
#[cfg_attr(
530-
all(test, any(target_os = "linux", target_env = "msvc")),
531-
assert_instr(ttransposed, DST = 0, A = 1)
532-
)]
533-
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
534-
pub unsafe fn _tile_transposed<const DST: i32, const A: i32>() {
535-
static_assert_uimm_bits!(DST, 3);
536-
static_assert_uimm_bits!(A, 3);
537-
ttransposed(DST as i8, A as i8);
538-
}
539-
540-
/// TODO - Document
541-
#[inline]
542-
#[rustc_legacy_const_generics(0)]
543-
#[target_feature(enable = "amx-transpose")]
544-
#[cfg_attr(
545-
all(test, any(target_os = "linux", target_env = "msvc")),
546-
assert_instr(t2rpntlvwz0, DST = 0)
547-
)]
548-
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
549-
pub unsafe fn _tile_2rpntlvwz0<const DST: i32>(base: *const u8, stride: usize) {
550-
static_assert_uimm_bits!(DST, 3);
551-
t2rpntlvwz0(DST as i8, base, stride);
552-
}
553-
554-
/// TODO - Document
555-
/// Provides a hint to the implementation that the data would be reused but does not need
556-
/// to be resident in the nearest cache levels.
557-
#[rustc_legacy_const_generics(0)]
558-
#[target_feature(enable = "amx-transpose")]
559-
#[cfg_attr(
560-
all(test, any(target_os = "linux", target_env = "msvc")),
561-
assert_instr(t2rpntlvwz0t1, DST = 0)
562-
)]
563-
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
564-
pub unsafe fn _tile_2rpntlvwz0t1<const DST: i32>(base: *const u8, stride: usize) {
565-
static_assert_uimm_bits!(DST, 3);
566-
t2rpntlvwz0t1(DST as i8, base, stride);
567-
}
568-
569-
/// TODO - Document
570-
#[inline]
571-
#[rustc_legacy_const_generics(0)]
572-
#[target_feature(enable = "amx-transpose")]
573-
#[cfg_attr(
574-
all(test, any(target_os = "linux", target_env = "msvc")),
575-
assert_instr(t2rpntlvwz1, DST = 0)
576-
)]
577-
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
578-
pub unsafe fn _tile_2rpntlvwz1<const DST: i32>(base: *const u8, stride: usize) {
579-
static_assert_uimm_bits!(DST, 3);
580-
t2rpntlvwz1(DST as i8, base, stride);
581-
}
582-
583-
/// TODO - Document
584-
/// Provides a hint to the implementation that the data would be reused but does not need
585-
/// to be resident in the nearest cache levels.
586-
#[inline]
587-
#[rustc_legacy_const_generics(0)]
588-
#[target_feature(enable = "amx-transpose")]
589-
#[cfg_attr(
590-
all(test, any(target_os = "linux", target_env = "msvc")),
591-
assert_instr(t2rpntlvwz1t1, DST = 0)
592-
)]
593-
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
594-
pub unsafe fn _tile_2rpntlvwz1t1<const DST: i32>(base: *const u8, stride: usize) {
595-
static_assert_uimm_bits!(DST, 3);
596-
t2rpntlvwz1t1(DST as i8, base, stride);
597-
}
598-
599-
/// TODO - Document
600-
/// Additionally, this intrinsic indicates the source memory location is likely to become
601-
/// read-shared by multiple processors, i.e., read in the future by at least one other processor
602-
/// before it is written, assuming it is ever written in the future.
603-
#[inline]
604-
#[rustc_legacy_const_generics(0)]
605-
#[target_feature(enable = "amx-transpose,amx-movrs")]
606-
#[cfg_attr(
607-
all(test, any(target_os = "linux", target_env = "msvc")),
608-
assert_instr(t2rpntlvwz0rs, DST = 0)
609-
)]
610-
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
611-
pub unsafe fn _tile_2rpntlvwz0rs<const DST: i32>(base: *const u8, stride: usize) {
612-
static_assert_uimm_bits!(DST, 3);
613-
t2rpntlvwz0rs(DST as i8, base, stride);
614-
}
615-
616-
/// TODO - Document
617-
/// Provides a hint to the implementation that the data would be reused but does not need
618-
/// to be resident in the nearest cache levels.
619-
/// Additionally, this intrinsic indicates the source memory location is likely to become
620-
/// read-shared by multiple processors, i.e., read in the future by at least one other processor
621-
/// before it is written, assuming it is ever written in the future.
622-
#[rustc_legacy_const_generics(0)]
623-
#[target_feature(enable = "amx-transpose,amx-movrs")]
624-
#[cfg_attr(
625-
all(test, any(target_os = "linux", target_env = "msvc")),
626-
assert_instr(t2rpntlvwz0rst1, DST = 0)
627-
)]
628-
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
629-
pub unsafe fn _tile_2rpntlvwz0rst1<const DST: i32>(base: *const u8, stride: usize) {
630-
static_assert_uimm_bits!(DST, 3);
631-
t2rpntlvwz0rst1(DST as i8, base, stride);
632-
}
633-
634-
/// TODO - Document
635-
/// Additionally, this intrinsic indicates the source memory location is likely to become
636-
/// read-shared by multiple processors, i.e., read in the future by at least one other processor
637-
/// before it is written, assuming it is ever written in the future.
638-
#[inline]
639-
#[rustc_legacy_const_generics(0)]
640-
#[target_feature(enable = "amx-transpose,amx-movrs")]
641-
#[cfg_attr(
642-
all(test, any(target_os = "linux", target_env = "msvc")),
643-
assert_instr(t2rpntlvwz1rs, DST = 0)
644-
)]
645-
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
646-
pub unsafe fn _tile_2rpntlvwz1rs<const DST: i32>(base: *const u8, stride: usize) {
647-
static_assert_uimm_bits!(DST, 3);
648-
t2rpntlvwz1rs(DST as i8, base, stride);
649-
}
650-
651-
/// TODO - Document
652-
/// Provides a hint to the implementation that the data would be reused but does not need
653-
/// to be resident in the nearest cache levels.
654-
/// Additionally, this intrinsic indicates the source memory location is likely to become
655-
/// read-shared by multiple processors, i.e., read in the future by at least one other processor
656-
/// before it is written, assuming it is ever written in the future.
657-
#[inline]
658-
#[rustc_legacy_const_generics(0)]
659-
#[target_feature(enable = "amx-transpose,amx-movrs")]
660-
#[cfg_attr(
661-
all(test, any(target_os = "linux", target_env = "msvc")),
662-
assert_instr(t2rpntlvwz1rst1, DST = 0)
663-
)]
664-
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
665-
pub unsafe fn _tile_2rpntlvwz1rst1<const DST: i32>(base: *const u8, stride: usize) {
666-
static_assert_uimm_bits!(DST, 3);
667-
t2rpntlvwz1rst1(DST as i8, base, stride);
668-
}
669-
670383
#[allow(improper_ctypes)]
671384
unsafe extern "C" {
672385
#[link_name = "llvm.x86.ldtilecfg"]
@@ -685,8 +398,6 @@ unsafe extern "C" {
685398
fn tilezero(dst: i8);
686399
#[link_name = "llvm.x86.tdpbf16ps"]
687400
fn tdpbf16ps(dst: i8, a: i8, b: i8);
688-
#[link_name = "llvm.x86.ttdpbf16ps"]
689-
fn ttdpbf16ps(dst: i8, a: i8, b: i8);
690401
#[link_name = "llvm.x86.tdpbuud"]
691402
fn tdpbuud(dst: i8, a: i8, b: i8);
692403
#[link_name = "llvm.x86.tdpbusd"]
@@ -697,20 +408,10 @@ unsafe extern "C" {
697408
fn tdpbssd(dst: i8, a: i8, b: i8);
698409
#[link_name = "llvm.x86.tdpfp16ps"]
699410
fn tdpfp16ps(dst: i8, a: i8, b: i8);
700-
#[link_name = "llvm.x86.ttdpfp16ps"]
701-
fn ttdpfp16ps(dst: i8, a: i8, b: i8);
702411
#[link_name = "llvm.x86.tcmmimfp16ps"]
703412
fn tcmmimfp16ps(dst: i8, a: i8, b: i8);
704413
#[link_name = "llvm.x86.tcmmrlfp16ps"]
705414
fn tcmmrlfp16ps(dst: i8, a: i8, b: i8);
706-
#[link_name = "llvm.x86.ttcmmimfp16ps"]
707-
fn ttcmmimfp16ps(dst: i8, a: i8, b: i8);
708-
#[link_name = "llvm.x86.ttcmmrlfp16ps"]
709-
fn ttcmmrlfp16ps(dst: i8, a: i8, b: i8);
710-
#[link_name = "llvm.x86.tconjtcmmimfp16ps"]
711-
fn tconjtcmmimfp16ps(dst: i8, a: i8, b: i8);
712-
#[link_name = "llvm.x86.tconjtfp16"]
713-
fn tconjtfp16(dst: i8, a: i8);
714415
#[link_name = "llvm.x86.tdpbf8ps"]
715416
fn tdpbf8ps(dst: i8, a: i8, b: i8);
716417
#[link_name = "llvm.x86.tdpbhf8ps"]
@@ -725,26 +426,6 @@ unsafe extern "C" {
725426
fn tileloaddrst164(dst: i8, base: *const u8, stride: usize);
726427
#[link_name = "llvm.x86.tmmultf32ps"]
727428
fn tmmultf32ps(dst: i8, a: i8, b: i8);
728-
#[link_name = "llvm.x86.ttmmultf32ps"]
729-
fn ttmmultf32ps(dst: i8, a: i8, b: i8);
730-
#[link_name = "llvm.x86.ttransposed"]
731-
fn ttransposed(dst: i8, a: i8);
732-
#[link_name = "llvm.x86.t2rpntlvwz0"]
733-
fn t2rpntlvwz0(dst: i8, base: *const u8, stride: usize);
734-
#[link_name = "llvm.x86.t2rpntlvwz0t1"]
735-
fn t2rpntlvwz0t1(dst: i8, base: *const u8, stride: usize);
736-
#[link_name = "llvm.x86.t2rpntlvwz1"]
737-
fn t2rpntlvwz1(dst: i8, base: *const u8, stride: usize);
738-
#[link_name = "llvm.x86.t2rpntlvwz1t1"]
739-
fn t2rpntlvwz1t1(dst: i8, base: *const u8, stride: usize);
740-
#[link_name = "llvm.x86.t2rpntlvwz0rs"]
741-
fn t2rpntlvwz0rs(dst: i8, base: *const u8, stride: usize);
742-
#[link_name = "llvm.x86.t2rpntlvwz0rst1"]
743-
fn t2rpntlvwz0rst1(dst: i8, base: *const u8, stride: usize);
744-
#[link_name = "llvm.x86.t2rpntlvwz1rs"]
745-
fn t2rpntlvwz1rs(dst: i8, base: *const u8, stride: usize);
746-
#[link_name = "llvm.x86.t2rpntlvwz1rst1"]
747-
fn t2rpntlvwz1rst1(dst: i8, base: *const u8, stride: usize);
748429
}
749430

750431
#[cfg(test)]

0 commit comments

Comments
 (0)