@@ -15,9 +15,12 @@ import Base: @invokelatest
1515
1616import .. Dagger
1717import .. Dagger: Context, Processor, Thunk, WeakThunk, ThunkFuture, DTaskFailedException, Chunk, WeakChunk, OSProc, AnyScope, DefaultScope, LockedObject
18- import .. Dagger: order, dependents, noffspring, istask, inputs, unwrap_weak_checked, affinity, tochunk, timespan_start, timespan_finish, procs, move, chunktype, processor, get_processors, get_parent, execute!, rmprocs!, task_processor, constrain, cputhreadtime
18+ import .. Dagger: order, dependents, noffspring, istask, inputs, unwrap_weak_checked, affinity, tochunk, timespan_start, timespan_finish, procs, move, chunktype, processor, get_processors, get_parent, execute!, rmprocs!, task_processor, constrain
1919import .. Dagger: @dagdebug , @safe_lock_spin1
2020import DataStructures: PriorityQueue, enqueue!, dequeue_pair!, peek
21+ import ScopedValues: @with
22+
23+ import MetricsTracker as MT
2124
2225import .. Dagger
2326
@@ -1648,6 +1651,12 @@ function do_task(to_proc, task_desc)
16481651 end
16491652 end
16501653
1654+ # Compute signature
1655+ @warn " Fix kwargs" maxlog= 1
1656+ sig = DataType[Tf, map (fetched_args) do x
1657+ chunktype (x)
1658+ end ... ]
1659+
16511660 #= FIXME : If MaxUtilization, stop processors and wait
16521661 if (est_time_util isa MaxUtilization) && (real_time_util > 0)
16531662 # FIXME : Stop processors
@@ -1660,8 +1669,11 @@ function do_task(to_proc, task_desc)
16601669 timespan_start (ctx, :compute , (;thunk_id, processor= to_proc), (;f))
16611670 res = nothing
16621671
1663- # Start counting time and GC allocations
1664- threadtime_start = cputhreadtime ()
1672+ # Setup metrics for time monitoring
1673+ mspec = MT. MetricsSpec (MT. TimeMetric (), Dagger. SignatureMetric (), Dagger. ProcessorMetric ())
1674+ local_cache = MT. MetricsCache ()
1675+
1676+ # Start counting GC allocations
16651677 # FIXME
16661678 # gcnum_start = Base.gc_num()
16671679
@@ -1677,9 +1689,13 @@ function do_task(to_proc, task_desc)
16771689 cancel_token= Dagger. DTASK_CANCEL_TOKEN[],
16781690 ))
16791691
1692+ # Execute
16801693 res = Dagger. with_options (propagated) do
1681- # Execute
1682- execute! (to_proc, f, fetched_args... ; fetched_kwargs... )
1694+ @with Dagger. TASK_SIGNATURE=> sig Dagger. TASK_PROCESSOR=> to_proc begin
1695+ MT. @with_metrics mspec Dagger :execute! thunk_id MT. SyncInto (local_cache) begin
1696+ execute! (to_proc, f, fetched_args... ; fetched_kwargs... )
1697+ end
1698+ end
16831699 end
16841700
16851701 # Check if result is safe to store
@@ -1705,10 +1721,16 @@ function do_task(to_proc, task_desc)
17051721 RemoteException (myid (), CapturedException (ex, bt))
17061722 end
17071723
1708- threadtime = cputhreadtime () - threadtime_start
1724+ lock (MT. GLOBAL_METRICS_CACHE) do global_cache
1725+ MT. sync_results_into! (global_cache, local_cache)
1726+ end
1727+
17091728 # FIXME : This is not a realistic measure of max. required memory
17101729 # gc_allocd = min(max(UInt64(Base.gc_num().allocd) - UInt64(gcnum_start.allocd), UInt64(0)), UInt64(1024^4))
17111730 timespan_finish (ctx, :compute , (;thunk_id, processor= to_proc), (;f, result= result_meta))
1731+
1732+ threadtime = MT. cache_lookup (local_cache, Dagger, :execute! , thunk_id, MT. TimeMetric ())
1733+
17121734 lock (TASK_SYNC) do
17131735 real_time_util[] -= est_time_util
17141736 pop! (TASKS_RUNNING, thunk_id)
@@ -1723,7 +1745,7 @@ function do_task(to_proc, task_desc)
17231745 storage_pressure= real_alloc_util,
17241746 storage_capacity= storage_cap,
17251747 loadavg= ((Sys. loadavg ()... ,) ./ Sys. CPU_THREADS),
1726- threadtime= threadtime ,
1748+ threadtime,
17271749 # FIXME : Add runtime allocation tracking
17281750 gc_allocd= (isa (result_meta, Chunk) ? result_meta. handle. size : 0 ),
17291751 transfer_rate= (transfer_size[] > 0 && transfer_time[] > 0 ) ? round (UInt64, transfer_size[] / (transfer_time[] / 10 ^ 9 )) : nothing ,
0 commit comments