2020from dataclasses import dataclass
2121from pathlib import Path
2222
23- from infscale .common .exceptions import InsufficientResources
23+ from infscale .common .exceptions import InsufficientResources , InsufficientThroughput
2424from infscale .configs .job import JobConfig
2525from infscale .configs .plan import ExecPlan
2626from infscale .controller .agent_context import AgentContext
@@ -89,6 +89,14 @@ class PipelineData:
8989 total_throughput : float
9090
9191
92+ @dataclass
93+ class DemandData :
94+ """DemandData class."""
95+
96+ rate : float = 0.0
97+ scale_out : bool = True
98+
99+
92100class Planner :
93101 """Planner class."""
94102
@@ -106,21 +114,35 @@ def build_config(
106114 self ,
107115 source : JobConfig ,
108116 agent_ctxts : dict [str , AgentContext ],
109- demand : float = 0 ,
117+ demand_data : DemandData ,
110118 base_cfg : JobConfig = None ,
111119 ) -> JobConfig :
112120 """Build a config based on source config."""
113121 if not self ._autoscale :
114122 # if autoscale is not enabled, we use source as is
115123 return source
116124
125+ rate , scale_out = demand_data .rate , demand_data .scale_out
126+
127+ if scale_out :
128+ return self ._get_scaled_out_cfg (source , agent_ctxts , rate , base_cfg )
129+
130+ return self ._get_scaled_in_cfg (base_cfg , rate )
131+
132+ def _get_scaled_out_cfg (
133+ self ,
134+ source : JobConfig ,
135+ agent_ctxts : dict [str , AgentContext ],
136+ rate : float ,
137+ base_cfg : JobConfig = None ,
138+ ) -> JobConfig :
117139 # if base_cfg is none, this is the first time we build a config,
118140 # so we need to place the dispatcher on a GPU
119141 # otherwise, we already have a base config, so we don't need to
120142 # spare a GPU for the dispatcher
121143 dispatcher_on_gpu = base_cfg is None
122144 solution = self ._calculate_placement (
123- source , agent_ctxts , demand , dispatcher_on_gpu = dispatcher_on_gpu
145+ source , agent_ctxts , rate , dispatcher_on_gpu = dispatcher_on_gpu
124146 )
125147
126148 if solution is None :
@@ -146,6 +168,29 @@ def build_config(
146168 # gen = CfgGen(agent_ctxts, source, plan_list, "cuda", base_cfg)
147169 # return gen.generate()
148170
171+ def _get_scaled_in_cfg (self , cfg : JobConfig , rate : float ) -> JobConfig :
172+ # compute remaining capacity if we remove the last pipeline
173+ total_thrpt = sum (
174+ data .total_throughput for data in self .pipeline_data [cfg .job_id ]
175+ )
176+ last_pipeline_thrpt = self .pipeline_data [cfg .job_id ][- 1 ].total_throughput
177+
178+ remaining_throughput = total_thrpt - last_pipeline_thrpt
179+
180+ # check if remaining capacity still comfortably exceeds current arrival rate
181+ # margin ensures we don't scale in too early due to random dips
182+ can_handle_load = remaining_throughput > rate
183+
184+ # return source config
185+ if not can_handle_load :
186+ raise InsufficientThroughput ("Not enough remaining throughput for scale in" )
187+
188+ data = self .pipeline_data [cfg .job_id ].pop ()
189+
190+ cfg = JobConfig .remove_pipeline (cfg , data .worker_ids )
191+
192+ return cfg
193+
149194 def _set_pipeline_data (self , cfg : JobConfig , total_throughput ) -> None :
150195 """Set pipeline data."""
151196 job_id = cfg .job_id
0 commit comments