@@ -100,6 +100,17 @@ struct xe_oa_config_bo {
100
100
struct xe_bb * bb ;
101
101
};
102
102
103
+ struct xe_oa_fence {
104
+ /* @base: dma fence base */
105
+ struct dma_fence base ;
106
+ /* @lock: lock for the fence */
107
+ spinlock_t lock ;
108
+ /* @work: work to signal @base */
109
+ struct delayed_work work ;
110
+ /* @cb: callback to schedule @work */
111
+ struct dma_fence_cb cb ;
112
+ };
113
+
103
114
#define DRM_FMT (x ) DRM_XE_OA_FMT_TYPE_##x
104
115
105
116
static const struct xe_oa_format oa_formats [] = {
@@ -172,10 +183,10 @@ static struct xe_oa_config *xe_oa_get_oa_config(struct xe_oa *oa, int metrics_se
172
183
return oa_config ;
173
184
}
174
185
175
- static void free_oa_config_bo (struct xe_oa_config_bo * oa_bo )
186
+ static void free_oa_config_bo (struct xe_oa_config_bo * oa_bo , struct dma_fence * last_fence )
176
187
{
177
188
xe_oa_config_put (oa_bo -> oa_config );
178
- xe_bb_free (oa_bo -> bb , NULL );
189
+ xe_bb_free (oa_bo -> bb , last_fence );
179
190
kfree (oa_bo );
180
191
}
181
192
@@ -655,7 +666,8 @@ static void xe_oa_free_configs(struct xe_oa_stream *stream)
655
666
656
667
xe_oa_config_put (stream -> oa_config );
657
668
llist_for_each_entry_safe (oa_bo , tmp , stream -> oa_config_bos .first , node )
658
- free_oa_config_bo (oa_bo );
669
+ free_oa_config_bo (oa_bo , stream -> last_fence );
670
+ dma_fence_put (stream -> last_fence );
659
671
}
660
672
661
673
static void xe_oa_store_flex (struct xe_oa_stream * stream , struct xe_lrc * lrc ,
@@ -951,40 +963,113 @@ xe_oa_alloc_config_buffer(struct xe_oa_stream *stream, struct xe_oa_config *oa_c
951
963
return oa_bo ;
952
964
}
953
965
966
+ static void xe_oa_update_last_fence (struct xe_oa_stream * stream , struct dma_fence * fence )
967
+ {
968
+ dma_fence_put (stream -> last_fence );
969
+ stream -> last_fence = dma_fence_get (fence );
970
+ }
971
+
972
+ static void xe_oa_fence_work_fn (struct work_struct * w )
973
+ {
974
+ struct xe_oa_fence * ofence = container_of (w , typeof (* ofence ), work .work );
975
+
976
+ /* Signal fence to indicate new OA configuration is active */
977
+ dma_fence_signal (& ofence -> base );
978
+ dma_fence_put (& ofence -> base );
979
+ }
980
+
981
+ static void xe_oa_config_cb (struct dma_fence * fence , struct dma_fence_cb * cb )
982
+ {
983
+ /* Additional empirical delay needed for NOA programming after registers are written */
984
+ #define NOA_PROGRAM_ADDITIONAL_DELAY_US 500
985
+
986
+ struct xe_oa_fence * ofence = container_of (cb , typeof (* ofence ), cb );
987
+
988
+ INIT_DELAYED_WORK (& ofence -> work , xe_oa_fence_work_fn );
989
+ queue_delayed_work (system_unbound_wq , & ofence -> work ,
990
+ usecs_to_jiffies (NOA_PROGRAM_ADDITIONAL_DELAY_US ));
991
+ dma_fence_put (fence );
992
+ }
993
+
994
+ static const char * xe_oa_get_driver_name (struct dma_fence * fence )
995
+ {
996
+ return "xe_oa" ;
997
+ }
998
+
999
+ static const char * xe_oa_get_timeline_name (struct dma_fence * fence )
1000
+ {
1001
+ return "unbound" ;
1002
+ }
1003
+
1004
+ static const struct dma_fence_ops xe_oa_fence_ops = {
1005
+ .get_driver_name = xe_oa_get_driver_name ,
1006
+ .get_timeline_name = xe_oa_get_timeline_name ,
1007
+ };
1008
+
954
1009
static int xe_oa_emit_oa_config (struct xe_oa_stream * stream , struct xe_oa_config * config )
955
1010
{
956
1011
#define NOA_PROGRAM_ADDITIONAL_DELAY_US 500
957
1012
struct xe_oa_config_bo * oa_bo ;
958
- int err = 0 , us = NOA_PROGRAM_ADDITIONAL_DELAY_US ;
1013
+ struct xe_oa_fence * ofence ;
1014
+ int i , err , num_signal = 0 ;
959
1015
struct dma_fence * fence ;
960
- long timeout ;
961
1016
962
- /* Emit OA configuration batch */
1017
+ ofence = kzalloc (sizeof (* ofence ), GFP_KERNEL );
1018
+ if (!ofence ) {
1019
+ err = - ENOMEM ;
1020
+ goto exit ;
1021
+ }
1022
+
963
1023
oa_bo = xe_oa_alloc_config_buffer (stream , config );
964
1024
if (IS_ERR (oa_bo )) {
965
1025
err = PTR_ERR (oa_bo );
966
1026
goto exit ;
967
1027
}
968
1028
1029
+ /* Emit OA configuration batch */
969
1030
fence = xe_oa_submit_bb (stream , XE_OA_SUBMIT_ADD_DEPS , oa_bo -> bb );
970
1031
if (IS_ERR (fence )) {
971
1032
err = PTR_ERR (fence );
972
1033
goto exit ;
973
1034
}
974
1035
975
- /* Wait till all previous batches have executed */
976
- timeout = dma_fence_wait_timeout (fence , false, 5 * HZ );
977
- dma_fence_put (fence );
978
- if (timeout < 0 )
979
- err = timeout ;
980
- else if (!timeout )
981
- err = - ETIME ;
982
- if (err )
983
- drm_dbg (& stream -> oa -> xe -> drm , "dma_fence_wait_timeout err %d\n" , err );
1036
+ /* Point of no return: initialize and set fence to signal */
1037
+ spin_lock_init (& ofence -> lock );
1038
+ dma_fence_init (& ofence -> base , & xe_oa_fence_ops , & ofence -> lock , 0 , 0 );
984
1039
985
- /* Additional empirical delay needed for NOA programming after registers are written */
986
- usleep_range (us , 2 * us );
1040
+ for (i = 0 ; i < stream -> num_syncs ; i ++ ) {
1041
+ if (stream -> syncs [i ].flags & DRM_XE_SYNC_FLAG_SIGNAL )
1042
+ num_signal ++ ;
1043
+ xe_sync_entry_signal (& stream -> syncs [i ], & ofence -> base );
1044
+ }
1045
+
1046
+ /* Additional dma_fence_get in case we dma_fence_wait */
1047
+ if (!num_signal )
1048
+ dma_fence_get (& ofence -> base );
1049
+
1050
+ /* Update last fence too before adding callback */
1051
+ xe_oa_update_last_fence (stream , fence );
1052
+
1053
+ /* Add job fence callback to schedule work to signal ofence->base */
1054
+ err = dma_fence_add_callback (fence , & ofence -> cb , xe_oa_config_cb );
1055
+ xe_gt_assert (stream -> gt , !err || err == - ENOENT );
1056
+ if (err == - ENOENT )
1057
+ xe_oa_config_cb (fence , & ofence -> cb );
1058
+
1059
+ /* If nothing needs to be signaled we wait synchronously */
1060
+ if (!num_signal ) {
1061
+ dma_fence_wait (& ofence -> base , false);
1062
+ dma_fence_put (& ofence -> base );
1063
+ }
1064
+
1065
+ /* Done with syncs */
1066
+ for (i = 0 ; i < stream -> num_syncs ; i ++ )
1067
+ xe_sync_entry_cleanup (& stream -> syncs [i ]);
1068
+ kfree (stream -> syncs );
1069
+
1070
+ return 0 ;
987
1071
exit :
1072
+ kfree (ofence );
988
1073
return err ;
989
1074
}
990
1075
0 commit comments