@@ -856,6 +856,44 @@ <h1>Source code for dpctl._sycl_timer</h1><div class="highlight"><pre>
856856 < span class ="k "> return</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _device_dt</ span >
857857
858858
859+ < span class ="k "> class</ span > < span class ="nc "> BaseDeviceTimer</ span > < span class ="p "> :</ span >
860+ < span class ="vm "> __slots__</ span > < span class ="o "> =</ span > < span class ="p "> [</ span > < span class ="s2 "> "queue"</ span > < span class ="p "> ]</ span >
861+
862+ < span class ="k "> def</ span > < span class ="fm "> __init__</ span > < span class ="p "> (</ span > < span class ="bp "> self</ span > < span class ="p "> ,</ span > < span class ="n "> sycl_queue</ span > < span class ="p "> ):</ span >
863+ < span class ="k "> if</ span > < span class ="ow "> not</ span > < span class ="nb "> isinstance</ span > < span class ="p "> (</ span > < span class ="n "> sycl_queue</ span > < span class ="p "> ,</ span > < span class ="n "> SyclQueue</ span > < span class ="p "> ):</ span >
864+ < span class ="k "> raise</ span > < span class ="ne "> TypeError</ span > < span class ="p "> (</ span > < span class ="sa "> f</ span > < span class ="s2 "> "Expected type SyclQueue, got </ span > < span class ="si "> {</ span > < span class ="nb "> type</ span > < span class ="p "> (</ span > < span class ="n "> sycl_queue</ span > < span class ="p "> )</ span > < span class ="si "> }</ span > < span class ="s2 "> "</ span > < span class ="p "> )</ span >
865+ < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> queue</ span > < span class ="o "> =</ span > < span class ="n "> sycl_queue</ span >
866+
867+
868+ < span class ="k "> class</ span > < span class ="nc "> QueueBarrierDeviceTimer</ span > < span class ="p "> (</ span > < span class ="n "> BaseDeviceTimer</ span > < span class ="p "> ):</ span >
869+ < span class ="vm "> __slots__</ span > < span class ="o "> =</ span > < span class ="p "> []</ span >
870+
871+ < span class ="k "> def</ span > < span class ="fm "> __init__</ span > < span class ="p "> (</ span > < span class ="bp "> self</ span > < span class ="p "> ,</ span > < span class ="n "> sycl_queue</ span > < span class ="p "> ):</ span >
872+ < span class ="nb "> super</ span > < span class ="p "> (</ span > < span class ="n "> QueueBarrierDeviceTimer</ span > < span class ="p "> ,</ span > < span class ="bp "> self</ span > < span class ="p "> )</ span > < span class ="o "> .</ span > < span class ="fm "> __init__</ span > < span class ="p "> (</ span > < span class ="n "> sycl_queue</ span > < span class ="p "> )</ span >
873+
874+ < span class ="k "> def</ span > < span class ="nf "> get_event</ span > < span class ="p "> (</ span > < span class ="bp "> self</ span > < span class ="p "> ):</ span >
875+ < span class ="k "> return</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> queue</ span > < span class ="o "> .</ span > < span class ="n "> submit_barrier</ span > < span class ="p "> ()</ span >
876+
877+
878+ < span class ="k "> class</ span > < span class ="nc "> OrderManagerDeviceTimer</ span > < span class ="p "> (</ span > < span class ="n "> BaseDeviceTimer</ span > < span class ="p "> ):</ span >
879+ < span class ="vm "> __slots__</ span > < span class ="o "> =</ span > < span class ="p "> [</ span > < span class ="s2 "> "_order_manager"</ span > < span class ="p "> ,</ span > < span class ="s2 "> "_submit_empty_task_fn"</ span > < span class ="p "> ]</ span >
880+
881+ < span class ="k "> def</ span > < span class ="fm "> __init__</ span > < span class ="p "> (</ span > < span class ="bp "> self</ span > < span class ="p "> ,</ span > < span class ="n "> sycl_queue</ span > < span class ="p "> ):</ span >
882+ < span class ="kn "> import</ span > < span class ="nn "> dpctl.utils._seq_order_keeper</ span > < span class ="k "> as</ span > < span class ="nn "> s_ok</ span >
883+ < span class ="kn "> from</ span > < span class ="nn "> dpctl.utils</ span > < span class ="kn "> import</ span > < span class ="n "> SequentialOrderManager</ span > < span class ="k "> as</ span > < span class ="n "> seq_om</ span >
884+
885+ < span class ="nb "> super</ span > < span class ="p "> (</ span > < span class ="n "> OrderManagerDeviceTimer</ span > < span class ="p "> ,</ span > < span class ="bp "> self</ span > < span class ="p "> )</ span > < span class ="o "> .</ span > < span class ="fm "> __init__</ span > < span class ="p "> (</ span > < span class ="n "> sycl_queue</ span > < span class ="p "> )</ span >
886+ < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _order_manager</ span > < span class ="o "> =</ span > < span class ="n "> seq_om</ span > < span class ="p "> [</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> queue</ span > < span class ="p "> ]</ span >
887+ < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _submit_empty_task_fn</ span > < span class ="o "> =</ span > < span class ="n "> s_ok</ span > < span class ="o "> .</ span > < span class ="n "> _submit_empty_task</ span >
888+
889+ < span class ="k "> def</ span > < span class ="nf "> get_event</ span > < span class ="p "> (</ span > < span class ="bp "> self</ span > < span class ="p "> ):</ span >
890+ < span class ="n "> ev</ span > < span class ="o "> =</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _submit_empty_task_fn</ span > < span class ="p "> (</ span >
891+ < span class ="n "> sycl_queue</ span > < span class ="o "> =</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> queue</ span > < span class ="p "> ,</ span > < span class ="n "> depends</ span > < span class ="o "> =</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _order_manager</ span > < span class ="o "> .</ span > < span class ="n "> submitted_events</ span >
892+ < span class ="p "> )</ span >
893+ < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _order_manager</ span > < span class ="o "> .</ span > < span class ="n "> add_event_pair</ span > < span class ="p "> (</ span > < span class ="n "> ev</ span > < span class ="p "> ,</ span > < span class ="n "> ev</ span > < span class ="p "> )</ span >
894+ < span class ="k "> return</ span > < span class ="n "> ev</ span >
895+
896+
859897< div class ="viewcode-block " id ="SyclTimer "> < a class ="viewcode-back " href ="../../api_reference/dpctl/generated/dpctl.SyclTimer.html#dpctl.SyclTimer "> [docs]</ a > < span class ="k "> class</ span > < span class ="nc "> SyclTimer</ span > < span class ="p "> :</ span >
860898< span class ="w "> </ span > < span class ="sd "> """</ span >
861899< span class ="sd "> Context to measure device time and host wall-time of execution</ span >
@@ -870,7 +908,7 @@ <h1>Source code for dpctl._sycl_timer</h1><div class="highlight"><pre>
870908< span class ="sd "> q = dpctl.SyclQueue(property="enable_profiling")</ span >
871909
872910< span class ="sd "> # create the timer</ span >
873- < span class ="sd "> milliseconds_sc = 1e-3 </ span >
911+ < span class ="sd "> milliseconds_sc = 1e3 </ span >
874912< span class ="sd "> timer = dpctl.SyclTimer(time_scale = milliseconds_sc)</ span >
875913
876914< span class ="sd "> # use the timer</ span >
@@ -885,25 +923,36 @@ <h1>Source code for dpctl._sycl_timer</h1><div class="highlight"><pre>
885923< span class ="sd "> wall_dt, device_dt = timer.dt</ span >
886924
887925< span class ="sd "> .. note::</ span >
888- < span class ="sd "> The timer submits barriers to the queue at the entrance and the</ span >
926+ < span class ="sd "> The timer submits tasks to the queue at the entrance and the</ span >
889927< span class ="sd "> exit of the context and uses profiling information from events</ span >
890928< span class ="sd "> associated with these submissions to perform the timing. Thus</ span >
891929< span class ="sd "> :class:`dpctl.SyclTimer` requires the queue with ``"enable_profiling"``</ span >
892930< span class ="sd "> property. In order to be able to collect the profiling information,</ span >
893931< span class ="sd "> the ``dt`` property ensures that both submitted barriers complete their</ span >
894932< span class ="sd "> execution and thus effectively synchronizes the queue.</ span >
895933
934+ < span class ="sd "> `device_timer` keyword argument controls the type of tasks submitted.</ span >
935+ < span class ="sd "> With `device_timer="queue_barrier"`, queue barrier tasks are used. With</ span >
936+ < span class ="sd "> `device_timer="order_manager"`, a single empty body task is inserted</ span >
937+ < span class ="sd "> instead relying on order manager (used by `dpctl.tensor` operations) to</ span >
938+ < span class ="sd "> order these tasks so that they fence operations performed within</ span >
939+ < span class ="sd "> timer's context.</ span >
940+
896941< span class ="sd "> Args:</ span >
897942< span class ="sd "> host_timer (callable, optional):</ span >
898943< span class ="sd "> A callable such that host_timer() returns current</ span >
899944< span class ="sd "> host time in seconds.</ span >
900945< span class ="sd "> Default: :py:func:`timeit.default_timer`.</ span >
946+ < span class ="sd "> device_timer (Literal["queue_barrier", "order_manager"], optional):</ span >
947+ < span class ="sd "> Device timing method. Default: "queue_barrier".</ span >
901948< span class ="sd "> time_scale (Union[int, float], optional):</ span >
902949< span class ="sd "> Ratio of the unit of time of interest and one second.</ span >
903950< span class ="sd "> Default: ``1``.</ span >
904951< span class ="sd "> """</ span >
905952
906- < div class ="viewcode-block " id ="SyclTimer.__init__ "> < a class ="viewcode-back " href ="../../api_reference/dpctl/generated/generated/dpctl.SyclTimer.__init__.html#dpctl.SyclTimer.__init__ "> [docs]</ a > < span class ="k "> def</ span > < span class ="fm "> __init__</ span > < span class ="p "> (</ span > < span class ="bp "> self</ span > < span class ="p "> ,</ span > < span class ="n "> host_timer</ span > < span class ="o "> =</ span > < span class ="n "> timeit</ span > < span class ="o "> .</ span > < span class ="n "> default_timer</ span > < span class ="p "> ,</ span > < span class ="n "> time_scale</ span > < span class ="o "> =</ span > < span class ="mi "> 1</ span > < span class ="p "> ):</ span >
953+ < div class ="viewcode-block " id ="SyclTimer.__init__ "> < a class ="viewcode-back " href ="../../api_reference/dpctl/generated/generated/dpctl.SyclTimer.__init__.html#dpctl.SyclTimer.__init__ "> [docs]</ a > < span class ="k "> def</ span > < span class ="fm "> __init__</ span > < span class ="p "> (</ span >
954+ < span class ="bp "> self</ span > < span class ="p "> ,</ span > < span class ="n "> host_timer</ span > < span class ="o "> =</ span > < span class ="n "> timeit</ span > < span class ="o "> .</ span > < span class ="n "> default_timer</ span > < span class ="p "> ,</ span > < span class ="n "> device_timer</ span > < span class ="o "> =</ span > < span class ="kc "> None</ span > < span class ="p "> ,</ span > < span class ="n "> time_scale</ span > < span class ="o "> =</ span > < span class ="mi "> 1</ span >
955+ < span class ="p "> ):</ span >
907956< span class ="w "> </ span > < span class ="sd "> """</ span >
908957< span class ="sd "> Create new instance of :class:`.SyclTimer`.</ span >
909958
@@ -912,6 +961,8 @@ <h1>Source code for dpctl._sycl_timer</h1><div class="highlight"><pre>
912961< span class ="sd "> A function that takes no arguments and returns a value</ span >
913962< span class ="sd "> measuring time.</ span >
914963< span class ="sd "> Default: :meth:`timeit.default_timer`.</ span >
964+ < span class ="sd "> device_timer (Literal["queue_barrier", "order_manager"], optional):</ span >
965+ < span class ="sd "> Device timing method. Default: "queue_barrier"</ span >
915966< span class ="sd "> time_scale (Union[int, float], optional):</ span >
916967< span class ="sd "> Scaling factor applied to durations measured by</ span >
917968< span class ="sd "> the host_timer. Default: ``1``.</ span >
@@ -920,12 +971,27 @@ <h1>Source code for dpctl._sycl_timer</h1><div class="highlight"><pre>
920971 < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> time_scale</ span > < span class ="o "> =</ span > < span class ="n "> time_scale</ span >
921972 < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> queue</ span > < span class ="o "> =</ span > < span class ="kc "> None</ span >
922973 < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> host_times</ span > < span class ="o "> =</ span > < span class ="p "> []</ span >
923- < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> bracketing_events</ span > < span class ="o "> =</ span > < span class ="p "> []</ span > </ div >
974+ < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> bracketing_events</ span > < span class ="o "> =</ span > < span class ="p "> []</ span >
975+ < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _context_data</ span > < span class ="o "> =</ span > < span class ="nb "> list</ span > < span class ="p "> ()</ span >
976+ < span class ="k "> if</ span > < span class ="n "> device_timer</ span > < span class ="ow "> is</ span > < span class ="kc "> None</ span > < span class ="p "> :</ span >
977+ < span class ="n "> device_timer</ span > < span class ="o "> =</ span > < span class ="s2 "> "queue_barrier"</ span >
978+ < span class ="k "> if</ span > < span class ="n "> device_timer</ span > < span class ="o "> ==</ span > < span class ="s2 "> "queue_barrier"</ span > < span class ="p "> :</ span >
979+ < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _device_timer_class</ span > < span class ="o "> =</ span > < span class ="n "> QueueBarrierDeviceTimer</ span >
980+ < span class ="k "> elif</ span > < span class ="n "> device_timer</ span > < span class ="o "> ==</ span > < span class ="s2 "> "order_manager"</ span > < span class ="p "> :</ span >
981+ < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _device_timer_class</ span > < span class ="o "> =</ span > < span class ="n "> OrderManagerDeviceTimer</ span >
982+ < span class ="k "> else</ span > < span class ="p "> :</ span >
983+ < span class ="k "> raise</ span > < span class ="ne "> ValueError</ span > < span class ="p "> (</ span >
984+ < span class ="s2 "> "Supported values for device_timer keyword are "</ span >
985+ < span class ="s2 "> "'queue_barrier', 'order_manager', got "</ span >
986+ < span class ="sa "> f</ span > < span class ="s2 "> "'</ span > < span class ="si "> {</ span > < span class ="n "> device_timer</ span > < span class ="si "> }</ span > < span class ="s2 "> '"</ span >
987+ < span class ="p "> )</ span >
988+ < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _device_timer</ span > < span class ="o "> =</ span > < span class ="kc "> None</ span > </ div >
924989
925990 < span class ="k "> def</ span > < span class ="fm "> __call__</ span > < span class ="p "> (</ span > < span class ="bp "> self</ span > < span class ="p "> ,</ span > < span class ="n "> queue</ span > < span class ="o "> =</ span > < span class ="kc "> None</ span > < span class ="p "> ):</ span >
926991 < span class ="k "> if</ span > < span class ="nb "> isinstance</ span > < span class ="p "> (</ span > < span class ="n "> queue</ span > < span class ="p "> ,</ span > < span class ="n "> SyclQueue</ span > < span class ="p "> ):</ span >
927992 < span class ="k "> if</ span > < span class ="n "> queue</ span > < span class ="o "> .</ span > < span class ="n "> has_enable_profiling</ span > < span class ="p "> :</ span >
928993 < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> queue</ span > < span class ="o "> =</ span > < span class ="n "> queue</ span >
994+ < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _device_timer</ span > < span class ="o "> =</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _device_timer_class</ span > < span class ="p "> (</ span > < span class ="n "> queue</ span > < span class ="p "> )</ span >
929995 < span class ="k "> else</ span > < span class ="p "> :</ span >
930996 < span class ="k "> raise</ span > < span class ="ne "> ValueError</ span > < span class ="p "> (</ span >
931997 < span class ="s2 "> "The given queue was not created with the "</ span >
@@ -939,17 +1005,17 @@ <h1>Source code for dpctl._sycl_timer</h1><div class="highlight"><pre>
9391005 < span class ="k "> return</ span > < span class ="bp "> self</ span >
9401006
9411007 < span class ="k "> def</ span > < span class ="fm "> __enter__</ span > < span class ="p "> (</ span > < span class ="bp "> self</ span > < span class ="p "> ):</ span >
942- < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _event_start</ span > < span class ="o "> =</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> queue</ span > < span class ="o "> .</ span > < span class ="n "> submit_barrier</ span > < span class ="p "> ()</ span >
943- < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _host_start</ span > < span class ="o "> =</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> timer</ span > < span class ="p "> ()</ span >
1008+ < span class ="n "> _event_start</ span > < span class ="o "> =</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _device_timer</ span > < span class ="o "> .</ span > < span class ="n "> get_event</ span > < span class ="p "> ()</ span >
1009+ < span class ="n "> _host_start</ span > < span class ="o "> =</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> timer</ span > < span class ="p "> ()</ span >
1010+ < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _context_data</ span > < span class ="o "> .</ span > < span class ="n "> append</ span > < span class ="p "> ((</ span > < span class ="n "> _event_start</ span > < span class ="p "> ,</ span > < span class ="n "> _host_start</ span > < span class ="p "> ))</ span >
9441011 < span class ="k "> return</ span > < span class ="bp "> self</ span >
9451012
9461013 < span class ="k "> def</ span > < span class ="fm "> __exit__</ span > < span class ="p "> (</ span > < span class ="bp "> self</ span > < span class ="p "> ,</ span > < span class ="o "> *</ span > < span class ="n "> args</ span > < span class ="p "> ):</ span >
947- < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> host_times</ span > < span class ="o "> .</ span > < span class ="n "> append</ span > < span class ="p "> ((</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _host_start</ span > < span class ="p "> ,</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> timer</ span > < span class ="p "> ()))</ span >
948- < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> bracketing_events</ span > < span class ="o "> .</ span > < span class ="n "> append</ span > < span class ="p "> (</ span >
949- < span class ="p "> (</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _event_start</ span > < span class ="p "> ,</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> queue</ span > < span class ="o "> .</ span > < span class ="n "> submit_barrier</ span > < span class ="p "> ())</ span >
950- < span class ="p "> )</ span >
951- < span class ="k "> del</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _event_start</ span >
952- < span class ="k "> del</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _host_start</ span >
1014+ < span class ="n "> _event_end</ span > < span class ="o "> =</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _device_timer</ span > < span class ="o "> .</ span > < span class ="n "> get_event</ span > < span class ="p "> ()</ span >
1015+ < span class ="n "> _host_end</ span > < span class ="o "> =</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> timer</ span > < span class ="p "> ()</ span >
1016+ < span class ="n "> _event_start</ span > < span class ="p "> ,</ span > < span class ="n "> _host_start</ span > < span class ="o "> =</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _context_data</ span > < span class ="o "> .</ span > < span class ="n "> pop</ span > < span class ="p "> ()</ span >
1017+ < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> host_times</ span > < span class ="o "> .</ span > < span class ="n "> append</ span > < span class ="p "> ((</ span > < span class ="n "> _host_start</ span > < span class ="p "> ,</ span > < span class ="n "> _host_end</ span > < span class ="p "> ))</ span >
1018+ < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> bracketing_events</ span > < span class ="o "> .</ span > < span class ="n "> append</ span > < span class ="p "> ((</ span > < span class ="n "> _event_start</ span > < span class ="p "> ,</ span > < span class ="n "> _event_end</ span > < span class ="p "> ))</ span >
9531019
9541020 < span class ="nd "> @property</ span >
9551021 < span class ="k "> def</ span > < span class ="nf "> dt</ span > < span class ="p "> (</ span > < span class ="bp "> self</ span > < span class ="p "> ):</ span >
0 commit comments