@@ -15,9 +15,12 @@ import Base: @invokelatest
15
15
16
16
import .. Dagger
17
17
import .. Dagger: Context, Processor, Thunk, WeakThunk, ThunkFuture, DTaskFailedException, Chunk, WeakChunk, OSProc, AnyScope, DefaultScope, LockedObject
18
- import .. Dagger: order, dependents, noffspring, istask, inputs, unwrap_weak_checked, affinity, tochunk, timespan_start, timespan_finish, procs, move, chunktype, processor, get_processors, get_parent, execute!, rmprocs!, task_processor, constrain, cputhreadtime
18
+ import .. Dagger: order, dependents, noffspring, istask, inputs, unwrap_weak_checked, affinity, tochunk, timespan_start, timespan_finish, procs, move, chunktype, processor, get_processors, get_parent, execute!, rmprocs!, task_processor, constrain
19
19
import .. Dagger: @dagdebug , @safe_lock_spin1
20
20
import DataStructures: PriorityQueue, enqueue!, dequeue_pair!, peek
21
+ import ScopedValues: @with
22
+
23
+ import MetricsTracker as MT
21
24
22
25
import .. Dagger
23
26
@@ -1648,6 +1651,12 @@ function do_task(to_proc, task_desc)
1648
1651
end
1649
1652
end
1650
1653
1654
+ # Compute signature
1655
+ @warn " Fix kwargs" maxlog= 1
1656
+ sig = DataType[Tf, map (fetched_args) do x
1657
+ chunktype (x)
1658
+ end ... ]
1659
+
1651
1660
#= FIXME : If MaxUtilization, stop processors and wait
1652
1661
if (est_time_util isa MaxUtilization) && (real_time_util > 0)
1653
1662
# FIXME : Stop processors
@@ -1660,8 +1669,11 @@ function do_task(to_proc, task_desc)
1660
1669
timespan_start (ctx, :compute , (;thunk_id, processor= to_proc), (;f))
1661
1670
res = nothing
1662
1671
1663
- # Start counting time and GC allocations
1664
- threadtime_start = cputhreadtime ()
1672
+ # Setup metrics for time monitoring
1673
+ mspec = MT. MetricsSpec (MT. TimeMetric (), Dagger. SignatureMetric (), Dagger. ProcessorMetric ())
1674
+ local_cache = MT. MetricsCache ()
1675
+
1676
+ # Start counting GC allocations
1665
1677
# FIXME
1666
1678
# gcnum_start = Base.gc_num()
1667
1679
@@ -1677,9 +1689,13 @@ function do_task(to_proc, task_desc)
1677
1689
cancel_token= Dagger. DTASK_CANCEL_TOKEN[],
1678
1690
))
1679
1691
1692
+ # Execute
1680
1693
res = Dagger. with_options (propagated) do
1681
- # Execute
1682
- execute! (to_proc, f, fetched_args... ; fetched_kwargs... )
1694
+ @with Dagger. TASK_SIGNATURE=> sig Dagger. TASK_PROCESSOR=> to_proc begin
1695
+ MT. @with_metrics mspec Dagger :execute! thunk_id MT. SyncInto (local_cache) begin
1696
+ execute! (to_proc, f, fetched_args... ; fetched_kwargs... )
1697
+ end
1698
+ end
1683
1699
end
1684
1700
1685
1701
# Check if result is safe to store
@@ -1705,10 +1721,16 @@ function do_task(to_proc, task_desc)
1705
1721
RemoteException (myid (), CapturedException (ex, bt))
1706
1722
end
1707
1723
1708
- threadtime = cputhreadtime () - threadtime_start
1724
+ lock (MT. GLOBAL_METRICS_CACHE) do global_cache
1725
+ MT. sync_results_into! (global_cache, local_cache)
1726
+ end
1727
+
1709
1728
# FIXME : This is not a realistic measure of max. required memory
1710
1729
# gc_allocd = min(max(UInt64(Base.gc_num().allocd) - UInt64(gcnum_start.allocd), UInt64(0)), UInt64(1024^4))
1711
1730
timespan_finish (ctx, :compute , (;thunk_id, processor= to_proc), (;f, result= result_meta))
1731
+
1732
+ threadtime = MT. cache_lookup (local_cache, Dagger, :execute! , thunk_id, MT. TimeMetric ())
1733
+
1712
1734
lock (TASK_SYNC) do
1713
1735
real_time_util[] -= est_time_util
1714
1736
pop! (TASKS_RUNNING, thunk_id)
@@ -1723,7 +1745,7 @@ function do_task(to_proc, task_desc)
1723
1745
storage_pressure= real_alloc_util,
1724
1746
storage_capacity= storage_cap,
1725
1747
loadavg= ((Sys. loadavg ()... ,) ./ Sys. CPU_THREADS),
1726
- threadtime= threadtime ,
1748
+ threadtime,
1727
1749
# FIXME : Add runtime allocation tracking
1728
1750
gc_allocd= (isa (result_meta, Chunk) ? result_meta. handle. size : 0 ),
1729
1751
transfer_rate= (transfer_size[] > 0 && transfer_time[] > 0 ) ? round (UInt64, transfer_size[] / (transfer_time[] / 10 ^ 9 )) : nothing ,
0 commit comments