@@ -17,8 +17,11 @@ limitations under the License.
1717package routingalgorithms
1818
1919import (
20+ "fmt"
2021 "math"
2122 "math/rand"
23+ "strconv"
24+ "strings"
2225
2326 "github.com/vllm-project/aibrix/pkg/cache"
2427 "github.com/vllm-project/aibrix/pkg/metrics"
@@ -45,14 +48,19 @@ func NewLeastRequestRouter() (types.Router, error) {
4548 return nil , err
4649 }
4750
48- return leastRequestRouter {
51+ return & leastRequestRouter {
4952 cache : c ,
5053 }, nil
5154}
5255
5356// Route request based of least active request among input ready pods
54- func (r leastRequestRouter ) Route (ctx * types.RoutingContext , readyPodList types.PodList ) (string , error ) {
57+ func (r * leastRequestRouter ) Route (ctx * types.RoutingContext , readyPodList types.PodList ) (string , error ) {
5558 readyPods := readyPodList .All ()
59+ // Use distributed DP-level API server routing when pods have multiple ports
60+ if isMultiPortPods (readyPods ) {
61+ return r .apiServerRoute (ctx , readyPods , readyPodList .ListPortsForPod ())
62+ }
63+ // Use default Pod-level routing
5664 targetPod := selectTargetPodWithLeastRequestCount (r .cache , readyPods )
5765
5866 // Use fallback if no valid metrics
@@ -68,6 +76,20 @@ func (r leastRequestRouter) Route(ctx *types.RoutingContext, readyPodList types.
6876 return ctx .TargetAddress (), nil
6977}
7078
79+ func (r * leastRequestRouter ) apiServerRoute (ctx * types.RoutingContext , readyPods []* v1.Pod , portsMap map [string ][]int ) (string , error ) {
80+ targetPod , targetPort := selectTargetPodAndPortWithLeastRequestCount (r .cache , readyPods , portsMap )
81+ if targetPod == nil {
82+ return "" , fmt .Errorf ("no target pod selected" )
83+ }
84+
85+ if targetPort == 0 {
86+ return "" , fmt .Errorf ("target pod does not have a port" )
87+ }
88+ ctx .SetTargetPod (targetPod )
89+ ctx .SetTargetPort (targetPort )
90+ return ctx .TargetAddress (), nil
91+ }
92+
7193func (r * leastRequestRouter ) SubscribedMetrics () []string {
7294 return []string {
7395 metrics .RealtimeNumRequestsRunning ,
@@ -95,13 +117,67 @@ func selectTargetPodWithLeastRequestCount(cache cache.Cache, readyPods []*v1.Pod
95117 return targetPod
96118}
97119
120+ func selectTargetPodAndPortWithLeastRequestCount (cache cache.Cache , readyPods []* v1.Pod , portsMap map [string ][]int ) (* v1.Pod , int ) {
121+ readyPodsMap := make (map [string ]* v1.Pod , len (readyPods ))
122+ for _ , pod := range readyPods {
123+ readyPodsMap [pod .Name ] = pod
124+ }
125+
126+ minCount := math .MaxInt32
127+
128+ var targetApiServers []string
129+ podRequestCount := getRequestCountsWithPort (cache , readyPods , portsMap )
130+ if len (podRequestCount ) == 0 {
131+ return nil , 0
132+ }
133+
134+ klog .V (4 ).InfoS ("selectTargetPodAndPortWithLeastRequestCount" , "podRequestCount" , podRequestCount )
135+ for servername , totalReq := range podRequestCount {
136+ if totalReq < minCount {
137+ minCount = totalReq
138+ targetApiServers = []string {servername }
139+ } else if totalReq == minCount {
140+ targetApiServers = append (targetApiServers , servername )
141+ }
142+ }
143+
144+ if len (targetApiServers ) == 0 {
145+ return nil , 0
146+ }
147+
148+ // Random selection among candidates
149+ selectedServer := targetApiServers [rand .Intn (len (targetApiServers ))]
150+ parts := strings .Split (selectedServer , "/" )
151+ if len (parts ) != 2 {
152+ klog .ErrorS (nil , "Invalid server name format" , "serverName" , selectedServer )
153+ return nil , 0
154+ }
155+
156+ podName := parts [0 ]
157+ portStr := parts [1 ]
158+
159+ targetPod , found := readyPodsMap [podName ]
160+ if ! found {
161+ klog .ErrorS (nil , "Selected pod not found in ready pods list" , "podName" , podName )
162+ return nil , 0
163+ }
164+
165+ targetPort , err := strconv .Atoi (portStr )
166+ if err != nil {
167+ klog .ErrorS (err , "Failed to parse port" , "port" , portStr )
168+ return targetPod , 0
169+ }
170+
171+ return targetPod , targetPort
172+ }
173+
98174// getRequestCounts returns running request count for each pod tracked by gateway.
99175// Note: Currently, gateway instance tracks active running request counts for each pod locally,
100176// if multiple gateway instances are active then state is not shared across them.
101177// It is advised to run on leader gateway instance.
102178// TODO: Support stateful information sync across gateway instances: https://github.com/vllm-project/aibrix/issues/761
103179func getRequestCounts (cache cache.Cache , readyPods []* v1.Pod ) map [string ]int {
104- podRequestCount := map [string ]int {}
180+ podRequestCount := make ( map [string ]int , len ( readyPods ))
105181 for _ , pod := range readyPods {
106182 runningReq , err := cache .GetMetricValueByPod (pod .Name , pod .Namespace , metrics .RealtimeNumRequestsRunning )
107183 if err != nil {
@@ -112,3 +188,45 @@ func getRequestCounts(cache cache.Cache, readyPods []*v1.Pod) map[string]int {
112188
113189 return podRequestCount
114190}
191+
192+ // getRequestCountsWithPort returns running request count for each pod with port tracked by gateway
193+ func getRequestCountsWithPort (cache cache.Cache , readyPods []* v1.Pod , portsMap map [string ][]int ) map [string ]int {
194+ podRequestCount := make (map [string ]int )
195+ for _ , pod := range readyPods {
196+ podPorts , exists := portsMap [pod .Name ]
197+ if ! exists || len (podPorts ) == 0 {
198+ continue
199+ }
200+
201+ for _ , port := range podPorts {
202+ var metricName string
203+ var keyName string
204+
205+ if len (podPorts ) == 1 {
206+ metricName = metrics .RealtimeNumRequestsRunning
207+ keyName = pod .Name
208+ } else {
209+ metricName = metrics .RealtimeNumRequestsRunning + "/" + strconv .Itoa (port )
210+ keyName = pod .Name + "/" + strconv .Itoa (port )
211+ }
212+
213+ var count int
214+ if val , err := cache .GetMetricValueByPod (pod .Name , pod .Namespace , metricName ); err == nil && val != nil {
215+ count = int (val .GetSimpleValue ())
216+ }
217+ podRequestCount [keyName ] = count
218+ }
219+ }
220+
221+ return podRequestCount
222+ }
223+
224+ func isMultiPortPods (pods []* v1.Pod ) bool {
225+ for _ , pod := range pods {
226+ if utils .IsDataParallelPod (pod ) {
227+ return true
228+ }
229+ }
230+
231+ return false
232+ }
0 commit comments