@@ -369,7 +369,7 @@ func.func @make_dma_descriptor_atomic_barrier(%base: !amdgpu.tdm_base<i32>, %bar
369369
370370// CHECK-LABEL: func @make_dma_descriptor_workgroup_mask
371371// CHECK-SAME: (%[[BASE:.+]]: !amdgpu.tdm_base<i32>, %[[WG_MASK:.+]]: i16, %[[TIMEOUT:.+]]: i1)
372- func.func @make_dma_descriptor_workgroup_mask (%base: !amdgpu.tdm_base <i32 >, %wg_mask: i16 , %timeout: i1 ) -> !amdgpu.tdm_descriptor < 2 > {
372+ func.func @make_dma_descriptor_workgroup_mask (%base: !amdgpu.tdm_base <i32 >, %wg_mask: i16 , %timeout: i1 ) -> !amdgpu.tdm_descriptor {
373373 // CHECK-DAG: %[[DGROUP0:.+]] = builtin.unrealized_conversion_cast %[[BASE]]
374374
375375 // CHECK-DAG: %[[C0:.+]] = llvm.mlir.constant(0 : i32)
@@ -440,126 +440,107 @@ func.func @make_dma_descriptor_workgroup_mask(%base: !amdgpu.tdm_base<i32>, %wg_
440440 // CHECK: %[[DGROUP1:.+]] = llvm.insertelement %[[SGPR7]], %[[DGROUP1_6]][%[[C7]] : i32]
441441
442442 // CHECK: %[[DGROUPS:.+]] = builtin.unrealized_conversion_cast %[[DGROUP0]], %[[DGROUP1]] : vector<4xi32>, vector<8xi32> to !amdgpu.tdm_descriptor
443- %descriptor = amdgpu.make_dma_descriptor %base globalSize [128 , 64 ] globalStride [64 , 1 ] sharedSize [128 , 64 ] workgroupMask %wg_mask earlyTimeout %timeout : !amdgpu.tdm_base <i32 > -> !amdgpu.tdm_descriptor < 2 >
444- func.return %descriptor : !amdgpu.tdm_descriptor < 2 >
443+ %descriptor = amdgpu.make_dma_descriptor %base globalSize [128 , 64 ] globalStride [64 , 1 ] sharedSize [128 , 64 ] workgroupMask %wg_mask earlyTimeout %timeout : !amdgpu.tdm_base <i32 > -> !amdgpu.tdm_descriptor
444+ func.return %descriptor : !amdgpu.tdm_descriptor
445445}
446446
447- // CHECK-LABEL: func @tensor_load_to_lds_d2
448- // CHECK-SAME: (%[[DESC:.+]]: !amdgpu.tdm_descriptor<2> )
449- func.func @tensor_load_to_lds_d2 (%desc: !amdgpu.tdm_descriptor < 2 > ) {
450- // CHECK: %[[DGROUPS:.+]]:2 = builtin.unrealized_conversion_cast %[[DESC]]
451- // CHECK: rocdl.tensor.load.to.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 0 : vector<4xi32>, vector<8xi32>
452- amdgpu.tensor_load_to_lds %desc : !amdgpu.tdm_descriptor < 2 >
447+ // CHECK-LABEL: func @tensor_load_to_lds
448+ // CHECK-SAME: (%[[DESC:.+]]: !amdgpu.tdm_descriptor)
449+ func.func @tensor_load_to_lds (%desc: !amdgpu.tdm_descriptor ) {
450+ // CHECK: %[[DGROUPS:.+]]:4 = builtin.unrealized_conversion_cast %[[DESC]]
451+ // CHECK: rocdl.tensor.load.to.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 0 : vector<4xi32>, vector<8xi32>
452+ amdgpu.tensor_load_to_lds %desc : !amdgpu.tdm_descriptor
453453
454- // CHECK: rocdl.tensor.load.to.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 0 : vector<4xi32>, vector<8xi32>
455- amdgpu.tensor_load_to_lds %desc { cache_scope = #amdgpu.cache_scope <workgroup > } : !amdgpu.tdm_descriptor < 2 >
454+ // CHECK: rocdl.tensor.load.to.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 0 : vector<4xi32>, vector<8xi32>
455+ amdgpu.tensor_load_to_lds %desc { cache_scope = #amdgpu.cache_scope <workgroup > } : !amdgpu.tdm_descriptor
456456
457- // CHECK: rocdl.tensor.load.to.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 1 : vector<4xi32>, vector<8xi32>
458- amdgpu.tensor_load_to_lds %desc { cache_scope = #amdgpu.cache_scope <shader_engine > } : !amdgpu.tdm_descriptor < 2 >
457+ // CHECK: rocdl.tensor.load.to.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 1 : vector<4xi32>, vector<8xi32>
458+ amdgpu.tensor_load_to_lds %desc { cache_scope = #amdgpu.cache_scope <shader_engine > } : !amdgpu.tdm_descriptor
459459
460- // CHECK: rocdl.tensor.load.to.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 2 : vector<4xi32>, vector<8xi32>
461- amdgpu.tensor_load_to_lds %desc { cache_scope = #amdgpu.cache_scope <device > } : !amdgpu.tdm_descriptor < 2 >
460+ // CHECK: rocdl.tensor.load.to.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 2 : vector<4xi32>, vector<8xi32>
461+ amdgpu.tensor_load_to_lds %desc { cache_scope = #amdgpu.cache_scope <device > } : !amdgpu.tdm_descriptor
462462
463- // CHECK: rocdl.tensor.load.to.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 3 : vector<4xi32>, vector<8xi32>
464- amdgpu.tensor_load_to_lds %desc { cache_scope = #amdgpu.cache_scope <system > } : !amdgpu.tdm_descriptor < 2 >
463+ // CHECK: rocdl.tensor.load.to.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 3 : vector<4xi32>, vector<8xi32>
464+ amdgpu.tensor_load_to_lds %desc { cache_scope = #amdgpu.cache_scope <system > } : !amdgpu.tdm_descriptor
465465
466- // CHECK: rocdl.tensor.load.to.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 0 : vector<4xi32>, vector<8xi32>
467- amdgpu.tensor_load_to_lds %desc { temporal_hint = #amdgpu.temporal_load_hint <regular > } : !amdgpu.tdm_descriptor < 2 >
466+ // CHECK: rocdl.tensor.load.to.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 0 : vector<4xi32>, vector<8xi32>
467+ amdgpu.tensor_load_to_lds %desc { temporal_hint = #amdgpu.temporal_load_hint <regular > } : !amdgpu.tdm_descriptor
468468
469- // CHECK: rocdl.tensor.load.to.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 4 : vector<4xi32>, vector<8xi32>
470- amdgpu.tensor_load_to_lds %desc { temporal_hint = #amdgpu.temporal_load_hint <nontemporal > } : !amdgpu.tdm_descriptor < 2 >
469+ // CHECK: rocdl.tensor.load.to.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 4 : vector<4xi32>, vector<8xi32>
470+ amdgpu.tensor_load_to_lds %desc { temporal_hint = #amdgpu.temporal_load_hint <nontemporal > } : !amdgpu.tdm_descriptor
471471
472- // CHECK: rocdl.tensor.load.to.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 8 : vector<4xi32>, vector<8xi32>
473- amdgpu.tensor_load_to_lds %desc { temporal_hint = #amdgpu.temporal_load_hint <highpriority > } : !amdgpu.tdm_descriptor < 2 >
472+ // CHECK: rocdl.tensor.load.to.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 8 : vector<4xi32>, vector<8xi32>
473+ amdgpu.tensor_load_to_lds %desc { temporal_hint = #amdgpu.temporal_load_hint <highpriority > } : !amdgpu.tdm_descriptor
474474
475- // CHECK: rocdl.tensor.load.to.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 12 : vector<4xi32>, vector<8xi32>
476- amdgpu.tensor_load_to_lds %desc { temporal_hint = #amdgpu.temporal_load_hint <lastuse > } : !amdgpu.tdm_descriptor < 2 >
475+ // CHECK: rocdl.tensor.load.to.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 12 : vector<4xi32>, vector<8xi32>
476+ amdgpu.tensor_load_to_lds %desc { temporal_hint = #amdgpu.temporal_load_hint <lastuse > } : !amdgpu.tdm_descriptor
477477
478- // CHECK: rocdl.tensor.load.to.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 16 : vector<4xi32>, vector<8xi32>
479- amdgpu.tensor_load_to_lds %desc { temporal_hint = #amdgpu.temporal_load_hint <nontemporal_regular > } : !amdgpu.tdm_descriptor < 2 >
478+ // CHECK: rocdl.tensor.load.to.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 16 : vector<4xi32>, vector<8xi32>
479+ amdgpu.tensor_load_to_lds %desc { temporal_hint = #amdgpu.temporal_load_hint <nontemporal_regular > } : !amdgpu.tdm_descriptor
480480
481- // CHECK: rocdl.tensor.load.to.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 20 : vector<4xi32>, vector<8xi32>
482- amdgpu.tensor_load_to_lds %desc { temporal_hint = #amdgpu.temporal_load_hint <regular_nontemporal > } : !amdgpu.tdm_descriptor < 2 >
481+ // CHECK: rocdl.tensor.load.to.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 20 : vector<4xi32>, vector<8xi32>
482+ amdgpu.tensor_load_to_lds %desc { temporal_hint = #amdgpu.temporal_load_hint <regular_nontemporal > } : !amdgpu.tdm_descriptor
483483
484- // CHECK: rocdl.tensor.load.to.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 24 : vector<4xi32>, vector<8xi32>
485- amdgpu.tensor_load_to_lds %desc { temporal_hint = #amdgpu.temporal_load_hint <nontemporal_highpriority > } : !amdgpu.tdm_descriptor < 2 >
484+ // CHECK: rocdl.tensor.load.to.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 24 : vector<4xi32>, vector<8xi32>
485+ amdgpu.tensor_load_to_lds %desc { temporal_hint = #amdgpu.temporal_load_hint <nontemporal_highpriority > } : !amdgpu.tdm_descriptor
486486
487- // CHECK: rocdl.tensor.load.to.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 0 : vector<4xi32>, vector<8xi32>
488- amdgpu.tensor_load_to_lds %desc { non_volatile = false } : !amdgpu.tdm_descriptor < 2 >
487+ // CHECK: rocdl.tensor.load.to.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 0 : vector<4xi32>, vector<8xi32>
488+ amdgpu.tensor_load_to_lds %desc { non_volatile = false } : !amdgpu.tdm_descriptor
489489
490- // CHECK: rocdl.tensor.load.to.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 32 : vector<4xi32>, vector<8xi32>
491- amdgpu.tensor_load_to_lds %desc { non_volatile = true } : !amdgpu.tdm_descriptor < 2 >
490+ // CHECK: rocdl.tensor.load.to.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 32 : vector<4xi32>, vector<8xi32>
491+ amdgpu.tensor_load_to_lds %desc { non_volatile = true } : !amdgpu.tdm_descriptor
492492
493493 func.return
494494}
495495
496- // CHECK-LABEL: func @tensor_load_to_lds
497- // CHECK-SAME: (%[[DESC:.+]]: !amdgpu.tdm_descriptor<4> )
498- func.func @tensor_load_to_lds (%desc: !amdgpu.tdm_descriptor < 4 > ) {
496+ // CHECK-LABEL: func @tensor_store_from_lds
497+ // CHECK-SAME: (%[[DESC:.+]]: !amdgpu.tdm_descriptor)
498+ func.func @tensor_store_from_lds (%desc: !amdgpu.tdm_descriptor ) {
499499 // CHECK: %[[DGROUPS:.+]]:4 = builtin.unrealized_conversion_cast %[[DESC]]
500- // CHECK: rocdl.tensor.load.to.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 0 : vector<4xi32>, vector<8xi32>
501- amdgpu.tensor_load_to_lds %desc : !amdgpu.tdm_descriptor <4 >
502- func.return
503- }
504-
505- // CHECK-LABEL: func @tensor_store_from_lds_d2
506- // CHECK-SAME: (%[[DESC:.+]]: !amdgpu.tdm_descriptor<2>)
507- func.func @tensor_store_from_lds_d2 (%desc: !amdgpu.tdm_descriptor <2 >) {
508- // CHECK: %[[DGROUPS:.+]]:2 = builtin.unrealized_conversion_cast %[[DESC]]
509- // CHECK: rocdl.tensor.store.from.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 0 : vector<4xi32>, vector<8xi32>
510- amdgpu.tensor_store_from_lds %desc : !amdgpu.tdm_descriptor <2 >
511-
512- // CHECK: rocdl.tensor.store.from.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 0 : vector<4xi32>, vector<8xi32>
513- amdgpu.tensor_store_from_lds %desc { cache_scope = #amdgpu.cache_scope <workgroup > } : !amdgpu.tdm_descriptor <2 >
514-
515- // CHECK: rocdl.tensor.store.from.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 1 : vector<4xi32>, vector<8xi32>
516- amdgpu.tensor_store_from_lds %desc { cache_scope = #amdgpu.cache_scope <shader_engine > } : !amdgpu.tdm_descriptor <2 >
517-
518- // CHECK: rocdl.tensor.store.from.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 2 : vector<4xi32>, vector<8xi32>
519- amdgpu.tensor_store_from_lds %desc { cache_scope = #amdgpu.cache_scope <device > } : !amdgpu.tdm_descriptor <2 >
500+ // CHECK: rocdl.tensor.store.from.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 0 : vector<4xi32>, vector<8xi32>
501+ amdgpu.tensor_store_from_lds %desc : !amdgpu.tdm_descriptor
520502
521- // CHECK: rocdl.tensor.store.from.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 3 : vector<4xi32>, vector<8xi32>
522- amdgpu.tensor_store_from_lds %desc { cache_scope = #amdgpu.cache_scope <system > } : !amdgpu.tdm_descriptor < 2 >
503+ // CHECK: rocdl.tensor.store.from.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 0 : vector<4xi32>, vector<8xi32>
504+ amdgpu.tensor_store_from_lds %desc { cache_scope = #amdgpu.cache_scope <workgroup > } : !amdgpu.tdm_descriptor
523505
524- // CHECK: rocdl.tensor.store.from.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 0 : vector<4xi32>, vector<8xi32>
525- amdgpu.tensor_store_from_lds %desc { temporal_hint = #amdgpu.temporal_store_hint < regular > } : !amdgpu.tdm_descriptor < 2 >
506+ // CHECK: rocdl.tensor.store.from.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 1 : vector<4xi32>, vector<8xi32>
507+ amdgpu.tensor_store_from_lds %desc { cache_scope = #amdgpu.cache_scope < shader_engine > } : !amdgpu.tdm_descriptor
526508
527- // CHECK: rocdl.tensor.store.from.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 4 : vector<4xi32>, vector<8xi32>
528- amdgpu.tensor_store_from_lds %desc { temporal_hint = #amdgpu.temporal_store_hint < nontemporal > } : !amdgpu.tdm_descriptor < 2 >
509+ // CHECK: rocdl.tensor.store.from.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 2 : vector<4xi32>, vector<8xi32>
510+ amdgpu.tensor_store_from_lds %desc { cache_scope = #amdgpu.cache_scope < device > } : !amdgpu.tdm_descriptor
529511
530- // CHECK: rocdl.tensor.store.from.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 8 : vector<4xi32>, vector<8xi32>
531- amdgpu.tensor_store_from_lds %desc { temporal_hint = #amdgpu.temporal_store_hint < highpriority > } : !amdgpu.tdm_descriptor < 2 >
512+ // CHECK: rocdl.tensor.store.from.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 3 : vector<4xi32>, vector<8xi32>
513+ amdgpu.tensor_store_from_lds %desc { cache_scope = #amdgpu.cache_scope < system > } : !amdgpu.tdm_descriptor
532514
533- // CHECK: rocdl.tensor.store.from.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 12 : vector<4xi32>, vector<8xi32>
534- amdgpu.tensor_store_from_lds %desc { temporal_hint = #amdgpu.temporal_store_hint <writeback > } : !amdgpu.tdm_descriptor < 2 >
515+ // CHECK: rocdl.tensor.store.from.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 0 : vector<4xi32>, vector<8xi32>
516+ amdgpu.tensor_store_from_lds %desc { temporal_hint = #amdgpu.temporal_store_hint <regular > } : !amdgpu.tdm_descriptor
535517
536- // CHECK: rocdl.tensor.store.from.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 16 : vector<4xi32>, vector<8xi32>
537- amdgpu.tensor_store_from_lds %desc { temporal_hint = #amdgpu.temporal_store_hint <nontemporal_regular > } : !amdgpu.tdm_descriptor < 2 >
518+ // CHECK: rocdl.tensor.store.from.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 4 : vector<4xi32>, vector<8xi32>
519+ amdgpu.tensor_store_from_lds %desc { temporal_hint = #amdgpu.temporal_store_hint <nontemporal > } : !amdgpu.tdm_descriptor
538520
539- // CHECK: rocdl.tensor.store.from.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 20 : vector<4xi32>, vector<8xi32>
540- amdgpu.tensor_store_from_lds %desc { temporal_hint = #amdgpu.temporal_store_hint <regular_nontemporal > } : !amdgpu.tdm_descriptor < 2 >
521+ // CHECK: rocdl.tensor.store.from.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 8 : vector<4xi32>, vector<8xi32>
522+ amdgpu.tensor_store_from_lds %desc { temporal_hint = #amdgpu.temporal_store_hint <highpriority > } : !amdgpu.tdm_descriptor
541523
542- // CHECK: rocdl.tensor.store.from.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 24 : vector<4xi32>, vector<8xi32>
543- amdgpu.tensor_store_from_lds %desc { temporal_hint = #amdgpu.temporal_store_hint <nontemporal_highpriority > } : !amdgpu.tdm_descriptor < 2 >
524+ // CHECK: rocdl.tensor.store.from.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 12 : vector<4xi32>, vector<8xi32>
525+ amdgpu.tensor_store_from_lds %desc { temporal_hint = #amdgpu.temporal_store_hint <writeback > } : !amdgpu.tdm_descriptor
544526
545- // CHECK: rocdl.tensor.store.from.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 28 : vector<4xi32>, vector<8xi32>
546- amdgpu.tensor_store_from_lds %desc { temporal_hint = #amdgpu.temporal_store_hint <nontemporal_writeback > } : !amdgpu.tdm_descriptor < 2 >
527+ // CHECK: rocdl.tensor.store.from.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 16 : vector<4xi32>, vector<8xi32>
528+ amdgpu.tensor_store_from_lds %desc { temporal_hint = #amdgpu.temporal_store_hint <nontemporal_regular > } : !amdgpu.tdm_descriptor
547529
548- // CHECK: rocdl.tensor.store.from.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 0 : vector<4xi32>, vector<8xi32>
549- amdgpu.tensor_store_from_lds %desc { non_volatile = false } : !amdgpu.tdm_descriptor < 2 >
530+ // CHECK: rocdl.tensor.store.from.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 20 : vector<4xi32>, vector<8xi32>
531+ amdgpu.tensor_store_from_lds %desc { temporal_hint = #amdgpu.temporal_store_hint < regular_nontemporal > } : !amdgpu.tdm_descriptor
550532
551- // CHECK: rocdl.tensor.store.from.lds.d2 %[[DGROUPS]]#0, %[[DGROUPS]]#1 cachepolicy 32 : vector<4xi32>, vector<8xi32>
552- amdgpu.tensor_store_from_lds %desc { non_volatile = true } : !amdgpu.tdm_descriptor <2 >
553- func.return
554- }
533+ // CHECK: rocdl.tensor.store.from.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 24 : vector<4xi32>, vector<8xi32>
534+ amdgpu.tensor_store_from_lds %desc { temporal_hint = #amdgpu.temporal_store_hint <nontemporal_highpriority > } : !amdgpu.tdm_descriptor
555535
536+ // CHECK: rocdl.tensor.store.from.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 28 : vector<4xi32>, vector<8xi32>
537+ amdgpu.tensor_store_from_lds %desc { temporal_hint = #amdgpu.temporal_store_hint <nontemporal_writeback > } : !amdgpu.tdm_descriptor
556538
557- // CHECK-LABEL: func @tensor_store_from_lds
558- // CHECK-SAME: (%[[DESC:.+]]: !amdgpu.tdm_descriptor<4>)
559- func.func @tensor_store_from_lds (%desc: !amdgpu.tdm_descriptor <4 >) {
560- // CHECK: %[[DGROUPS:.+]]:4 = builtin.unrealized_conversion_cast %[[DESC]]
561539 // CHECK: rocdl.tensor.store.from.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 0 : vector<4xi32>, vector<8xi32>
562- amdgpu.tensor_store_from_lds %desc : !amdgpu.tdm_descriptor <4 >
540+ amdgpu.tensor_store_from_lds %desc { non_volatile = false } : !amdgpu.tdm_descriptor
541+
542+ // CHECK: rocdl.tensor.store.from.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 32 : vector<4xi32>, vector<8xi32>
543+ amdgpu.tensor_store_from_lds %desc { non_volatile = true } : !amdgpu.tdm_descriptor
563544 func.return
564545}
565546
0 commit comments