@@ -100,6 +100,17 @@ struct xe_oa_config_bo {
100100 struct xe_bb * bb ;
101101};
102102
103+ struct xe_oa_fence {
104+ /* @base: dma fence base */
105+ struct dma_fence base ;
106+ /* @lock: lock for the fence */
107+ spinlock_t lock ;
108+ /* @work: work to signal @base */
109+ struct delayed_work work ;
110+ /* @cb: callback to schedule @work */
111+ struct dma_fence_cb cb ;
112+ };
113+
103114#define DRM_FMT (x ) DRM_XE_OA_FMT_TYPE_##x
104115
105116static const struct xe_oa_format oa_formats [] = {
@@ -172,10 +183,10 @@ static struct xe_oa_config *xe_oa_get_oa_config(struct xe_oa *oa, int metrics_se
172183 return oa_config ;
173184}
174185
175- static void free_oa_config_bo (struct xe_oa_config_bo * oa_bo )
186+ static void free_oa_config_bo (struct xe_oa_config_bo * oa_bo , struct dma_fence * last_fence )
176187{
177188 xe_oa_config_put (oa_bo -> oa_config );
178- xe_bb_free (oa_bo -> bb , NULL );
189+ xe_bb_free (oa_bo -> bb , last_fence );
179190 kfree (oa_bo );
180191}
181192
@@ -655,7 +666,8 @@ static void xe_oa_free_configs(struct xe_oa_stream *stream)
655666
656667 xe_oa_config_put (stream -> oa_config );
657668 llist_for_each_entry_safe (oa_bo , tmp , stream -> oa_config_bos .first , node )
658- free_oa_config_bo (oa_bo );
669+ free_oa_config_bo (oa_bo , stream -> last_fence );
670+ dma_fence_put (stream -> last_fence );
659671}
660672
661673static void xe_oa_store_flex (struct xe_oa_stream * stream , struct xe_lrc * lrc ,
@@ -951,40 +963,113 @@ xe_oa_alloc_config_buffer(struct xe_oa_stream *stream, struct xe_oa_config *oa_c
951963 return oa_bo ;
952964}
953965
966+ static void xe_oa_update_last_fence (struct xe_oa_stream * stream , struct dma_fence * fence )
967+ {
968+ dma_fence_put (stream -> last_fence );
969+ stream -> last_fence = dma_fence_get (fence );
970+ }
971+
972+ static void xe_oa_fence_work_fn (struct work_struct * w )
973+ {
974+ struct xe_oa_fence * ofence = container_of (w , typeof (* ofence ), work .work );
975+
976+ /* Signal fence to indicate new OA configuration is active */
977+ dma_fence_signal (& ofence -> base );
978+ dma_fence_put (& ofence -> base );
979+ }
980+
981+ static void xe_oa_config_cb (struct dma_fence * fence , struct dma_fence_cb * cb )
982+ {
983+ /* Additional empirical delay needed for NOA programming after registers are written */
984+ #define NOA_PROGRAM_ADDITIONAL_DELAY_US 500
985+
986+ struct xe_oa_fence * ofence = container_of (cb , typeof (* ofence ), cb );
987+
988+ INIT_DELAYED_WORK (& ofence -> work , xe_oa_fence_work_fn );
989+ queue_delayed_work (system_unbound_wq , & ofence -> work ,
990+ usecs_to_jiffies (NOA_PROGRAM_ADDITIONAL_DELAY_US ));
991+ dma_fence_put (fence );
992+ }
993+
994+ static const char * xe_oa_get_driver_name (struct dma_fence * fence )
995+ {
996+ return "xe_oa" ;
997+ }
998+
999+ static const char * xe_oa_get_timeline_name (struct dma_fence * fence )
1000+ {
1001+ return "unbound" ;
1002+ }
1003+
1004+ static const struct dma_fence_ops xe_oa_fence_ops = {
1005+ .get_driver_name = xe_oa_get_driver_name ,
1006+ .get_timeline_name = xe_oa_get_timeline_name ,
1007+ };
1008+
9541009static int xe_oa_emit_oa_config (struct xe_oa_stream * stream , struct xe_oa_config * config )
9551010{
9561011#define NOA_PROGRAM_ADDITIONAL_DELAY_US 500
9571012 struct xe_oa_config_bo * oa_bo ;
958- int err = 0 , us = NOA_PROGRAM_ADDITIONAL_DELAY_US ;
1013+ struct xe_oa_fence * ofence ;
1014+ int i , err , num_signal = 0 ;
9591015 struct dma_fence * fence ;
960- long timeout ;
9611016
962- /* Emit OA configuration batch */
1017+ ofence = kzalloc (sizeof (* ofence ), GFP_KERNEL );
1018+ if (!ofence ) {
1019+ err = - ENOMEM ;
1020+ goto exit ;
1021+ }
1022+
9631023 oa_bo = xe_oa_alloc_config_buffer (stream , config );
9641024 if (IS_ERR (oa_bo )) {
9651025 err = PTR_ERR (oa_bo );
9661026 goto exit ;
9671027 }
9681028
1029+ /* Emit OA configuration batch */
9691030 fence = xe_oa_submit_bb (stream , XE_OA_SUBMIT_ADD_DEPS , oa_bo -> bb );
9701031 if (IS_ERR (fence )) {
9711032 err = PTR_ERR (fence );
9721033 goto exit ;
9731034 }
9741035
975- /* Wait till all previous batches have executed */
976- timeout = dma_fence_wait_timeout (fence , false, 5 * HZ );
977- dma_fence_put (fence );
978- if (timeout < 0 )
979- err = timeout ;
980- else if (!timeout )
981- err = - ETIME ;
982- if (err )
983- drm_dbg (& stream -> oa -> xe -> drm , "dma_fence_wait_timeout err %d\n" , err );
1036+ /* Point of no return: initialize and set fence to signal */
1037+ spin_lock_init (& ofence -> lock );
1038+ dma_fence_init (& ofence -> base , & xe_oa_fence_ops , & ofence -> lock , 0 , 0 );
9841039
985- /* Additional empirical delay needed for NOA programming after registers are written */
986- usleep_range (us , 2 * us );
1040+ for (i = 0 ; i < stream -> num_syncs ; i ++ ) {
1041+ if (stream -> syncs [i ].flags & DRM_XE_SYNC_FLAG_SIGNAL )
1042+ num_signal ++ ;
1043+ xe_sync_entry_signal (& stream -> syncs [i ], & ofence -> base );
1044+ }
1045+
1046+ /* Additional dma_fence_get in case we dma_fence_wait */
1047+ if (!num_signal )
1048+ dma_fence_get (& ofence -> base );
1049+
1050+ /* Update last fence too before adding callback */
1051+ xe_oa_update_last_fence (stream , fence );
1052+
1053+ /* Add job fence callback to schedule work to signal ofence->base */
1054+ err = dma_fence_add_callback (fence , & ofence -> cb , xe_oa_config_cb );
1055+ xe_gt_assert (stream -> gt , !err || err == - ENOENT );
1056+ if (err == - ENOENT )
1057+ xe_oa_config_cb (fence , & ofence -> cb );
1058+
1059+ /* If nothing needs to be signaled we wait synchronously */
1060+ if (!num_signal ) {
1061+ dma_fence_wait (& ofence -> base , false);
1062+ dma_fence_put (& ofence -> base );
1063+ }
1064+
1065+ /* Done with syncs */
1066+ for (i = 0 ; i < stream -> num_syncs ; i ++ )
1067+ xe_sync_entry_cleanup (& stream -> syncs [i ]);
1068+ kfree (stream -> syncs );
1069+
1070+ return 0 ;
9871071exit :
1072+ kfree (ofence );
9881073 return err ;
9891074}
9901075
0 commit comments