40
40
#include "xe_gt_printk.h"
41
41
#include "xe_gt_sriov_vf.h"
42
42
#include "xe_guc.h"
43
+ #include "xe_guc_pc.h"
43
44
#include "xe_hw_engine_group.h"
44
45
#include "xe_hwmon.h"
45
46
#include "xe_irq.h"
@@ -986,38 +987,15 @@ void xe_device_wmb(struct xe_device *xe)
986
987
xe_mmio_write32 (xe_root_tile_mmio (xe ), VF_CAP_REG , 0 );
987
988
}
988
989
989
- /**
990
- * xe_device_td_flush() - Flush transient L3 cache entries
991
- * @xe: The device
992
- *
993
- * Display engine has direct access to memory and is never coherent with L3/L4
994
- * caches (or CPU caches), however KMD is responsible for specifically flushing
995
- * transient L3 GPU cache entries prior to the flip sequence to ensure scanout
996
- * can happen from such a surface without seeing corruption.
997
- *
998
- * Display surfaces can be tagged as transient by mapping it using one of the
999
- * various L3:XD PAT index modes on Xe2.
1000
- *
1001
- * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
1002
- * at the end of each submission via PIPE_CONTROL for compute/render, since SA
1003
- * Media is not coherent with L3 and we want to support render-vs-media
1004
- * usescases. For other engines like copy/blt the HW internally forces uncached
1005
- * behaviour, hence why we can skip the TDF on such platforms.
990
+ /*
991
+ * Issue a TRANSIENT_FLUSH_REQUEST and wait for completion on each gt.
1006
992
*/
1007
- void xe_device_td_flush (struct xe_device * xe )
993
+ static void tdf_request_sync (struct xe_device * xe )
1008
994
{
1009
- struct xe_gt * gt ;
1010
995
unsigned int fw_ref ;
996
+ struct xe_gt * gt ;
1011
997
u8 id ;
1012
998
1013
- if (!IS_DGFX (xe ) || GRAPHICS_VER (xe ) < 20 )
1014
- return ;
1015
-
1016
- if (XE_WA (xe_root_mmio_gt (xe ), 16023588340 )) {
1017
- xe_device_l2_flush (xe );
1018
- return ;
1019
- }
1020
-
1021
999
for_each_gt (gt , xe , id ) {
1022
1000
if (xe_gt_is_media_type (gt ))
1023
1001
continue ;
@@ -1027,6 +1005,7 @@ void xe_device_td_flush(struct xe_device *xe)
1027
1005
return ;
1028
1006
1029
1007
xe_mmio_write32 (& gt -> mmio , XE2_TDF_CTRL , TRANSIENT_FLUSH_REQUEST );
1008
+
1030
1009
/*
1031
1010
* FIXME: We can likely do better here with our choice of
1032
1011
* timeout. Currently we just assume the worst case, i.e. 150us,
@@ -1057,15 +1036,52 @@ void xe_device_l2_flush(struct xe_device *xe)
1057
1036
return ;
1058
1037
1059
1038
spin_lock (& gt -> global_invl_lock );
1060
- xe_mmio_write32 (& gt -> mmio , XE2_GLOBAL_INVAL , 0x1 );
1061
1039
1040
+ xe_mmio_write32 (& gt -> mmio , XE2_GLOBAL_INVAL , 0x1 );
1062
1041
if (xe_mmio_wait32 (& gt -> mmio , XE2_GLOBAL_INVAL , 0x1 , 0x0 , 500 , NULL , true))
1063
1042
xe_gt_err_once (gt , "Global invalidation timeout\n" );
1043
+
1064
1044
spin_unlock (& gt -> global_invl_lock );
1065
1045
1066
1046
xe_force_wake_put (gt_to_fw (gt ), fw_ref );
1067
1047
}
1068
1048
1049
+ /**
1050
+ * xe_device_td_flush() - Flush transient L3 cache entries
1051
+ * @xe: The device
1052
+ *
1053
+ * Display engine has direct access to memory and is never coherent with L3/L4
1054
+ * caches (or CPU caches), however KMD is responsible for specifically flushing
1055
+ * transient L3 GPU cache entries prior to the flip sequence to ensure scanout
1056
+ * can happen from such a surface without seeing corruption.
1057
+ *
1058
+ * Display surfaces can be tagged as transient by mapping it using one of the
1059
+ * various L3:XD PAT index modes on Xe2.
1060
+ *
1061
+ * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
1062
+ * at the end of each submission via PIPE_CONTROL for compute/render, since SA
1063
+ * Media is not coherent with L3 and we want to support render-vs-media
1064
+ * usescases. For other engines like copy/blt the HW internally forces uncached
1065
+ * behaviour, hence why we can skip the TDF on such platforms.
1066
+ */
1067
+ void xe_device_td_flush (struct xe_device * xe )
1068
+ {
1069
+ struct xe_gt * root_gt ;
1070
+
1071
+ if (!IS_DGFX (xe ) || GRAPHICS_VER (xe ) < 20 )
1072
+ return ;
1073
+
1074
+ root_gt = xe_root_mmio_gt (xe );
1075
+ if (XE_WA (root_gt , 16023588340 )) {
1076
+ /* A transient flush is not sufficient: flush the L2 */
1077
+ xe_device_l2_flush (xe );
1078
+ } else {
1079
+ xe_guc_pc_apply_flush_freq_limit (& root_gt -> uc .guc .pc );
1080
+ tdf_request_sync (xe );
1081
+ xe_guc_pc_remove_flush_freq_limit (& root_gt -> uc .guc .pc );
1082
+ }
1083
+ }
1084
+
1069
1085
u32 xe_device_ccs_bytes (struct xe_device * xe , u64 size )
1070
1086
{
1071
1087
return xe_device_has_flat_ccs (xe ) ?
0 commit comments