apache
diff --git a/‎iotdb-core/ainode/ainode/core/inference/inference_request.py‎
Lines changed: 2 additions & 0 deletions b/‎iotdb-core/ainode/ainode/core/inference/inference_request.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎iotdb-core/ainode/ainode/core/inference/inference_request_pool.py‎
Lines changed: 7 additions & 0 deletions b/‎iotdb-core/ainode/ainode/core/inference/inference_request_pool.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎iotdb-core/ainode/ainode/core/inference/inference_request_pool_group.py‎
Lines changed: 63 additions & 0 deletions b/‎iotdb-core/ainode/ainode/core/inference/inference_request_pool_group.py‎
Lines changed: 63 additions & 0 deletions
diff --git a/‎iotdb-core/ainode/ainode/core/inference/pool_controller.py‎
Lines changed: 129 additions & 0 deletions b/‎iotdb-core/ainode/ainode/core/inference/pool_controller.py‎
Lines changed: 129 additions & 0 deletions
diff --git a/‎iotdb-core/ainode/ainode/core/inference/pool_scheduler.py‎
Lines changed: 125 additions & 0 deletions b/‎iotdb-core/ainode/ainode/core/inference/pool_scheduler.py‎
Lines changed: 125 additions & 0 deletions
@@ -38,6 +38,7 @@ class InferenceRequest:
     def __init__(
         self,
         req_id: str,
+        model_id: str,
         inputs: torch.Tensor,
         inference_pipeline: AbstractInferencePipeline,
         max_new_tokens: int = 96,
@@ -47,6 +48,7 @@ def __init__(
             inputs = inputs.unsqueeze(0)
 
         self.req_id = req_id
+        self.model_id = model_id
         self.inputs = inputs
         self.infer_kwargs = infer_kwargs
         self.inference_pipeline = inference_pipeline
 
@@ -20,6 +20,7 @@
 import random
 import threading
 import time
+from enum import Enum
 
 import numpy as np
 import torch
@@ -33,6 +34,12 @@
 from ainode.core.manager.model_manager import ModelManager
 
 
+class PoolState(Enum):
+    INITIALIZING = "INITIALIZING"
+    RUNNING = "RUNNING"
+    STOPPING = "STOPPING"
+
+
 class InferenceRequestPool(mp.Process):
     """
     The request pool to handle inference for a specific model.
 
@@ -0,0 +1,63 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+from typing import Dict, Tuple
+
+import torch.multiprocessing as mp
+
+from ainode.core.exception import (
+    InferenceModelInternalError,
+)
+from ainode.core.inference.inference_request_pool import InferenceRequestPool
+from ainode.core.log import Logger
+
+logger = Logger()
+
+
+class PoolGroup:
+    """
+    A group of inference request pools for a specific model.
+    """
+
+    def __init__(self, model_id):
+        self.pool_group: Dict[int, Tuple[InferenceRequestPool, mp.Queue]] = {}
+        self.model_id = model_id
+
+    def get_pool_group(self) -> Dict[int, Tuple[InferenceRequestPool, mp.Queue]]:
+        return self.pool_group
+
+    def add_pool(
+        self, pool_id: int, request_pool: InferenceRequestPool, request_queue: mp.Queue
+    ):
+        self.pool_group[pool_id] = (request_pool, request_queue)
+
+    def get_pool_ids(self) -> list[int]:
+        return list(self.pool_group.keys())
+
+    def get_request_pool(self, pool_id) -> InferenceRequestPool:
+        if pool_id not in self.pool_group:
+            raise InferenceModelInternalError(
+                f"Pool ID {pool_id} not found for model {self.model_id}"
+            )
+        return self.pool_group[pool_id][0]
+
+    def get_request_queue(self, pool_id) -> mp.Queue:
+        if pool_id not in self.pool_group:
+            raise InferenceModelInternalError(
+                f"Pool ID {pool_id} not found for model {self.model_id}"
+            )
+        return self.pool_group[pool_id][1]
@@ -0,0 +1,129 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+from collections import defaultdict
+from typing import Dict, Optional
+
+import torch
+import torch.multiprocessing as mp
+
+from ainode.core.exception import (
+    InferenceModelInternalError,
+)
+from ainode.core.inference.inference_request import InferenceRequest
+from ainode.core.inference.inference_request_pool import InferenceRequestPool, PoolState
+from ainode.core.inference.inference_request_pool_group import PoolGroup
+from ainode.core.log import Logger
+
+logger = Logger()
+
+
+class PoolController:
+    """
+    A controller for handling inference request pools.
+    It handles the registration of pools, adding and removing requests,
+    and managing the state of each pool.
+    """
+
+    DEFAULT_DEVICE = torch.device("cpu")
+    # DEFAULT_DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    def __init__(self):
+        # structure: {model_id: {pool_id: PoolState}}
+        self.pool_states: Dict[str, Dict[int, PoolState]] = defaultdict(dict)
+        # structure: {model_id: PoolGroup}
+        self._request_pool_map: Dict[str, PoolGroup] = {}
+
+    def dispatch_request(self, model_id, req: InferenceRequest):
+        pool_idx = self._select_pool_by_hash(model_id, req.req_id)
+        self.add_request(pool_idx, req)
+        logger.debug(
+            f"[Inference][Device-{self.DEFAULT_DEVICE}][Pool-{pool_idx}][ID-{req.req_id}] Request is queued for inference"
+        )
+
+    def _select_pool_by_hash(self, model_id, req_id) -> int:
+        pool_ids = self.get_pool_ids(model_id)
+        if not pool_ids:
+            raise InferenceModelInternalError(
+                f"No available pools for model {model_id}"
+            )
+        start_idx = hash(req_id) % len(pool_ids)
+        n = len(pool_ids)
+        for i in range(n):
+            pool_id = pool_ids[(start_idx + i) % n]
+            state = self.get_state(model_id, pool_id)
+            if state == PoolState.RUNNING:
+                return pool_id
+        raise InferenceModelInternalError(
+            f"No RUNNING pools available for model {model_id}"
+        )
+
+    def register_pool(self, model_id, pool_id, request_pool, request_queue):
+        self.set_state(model_id, pool_id, PoolState.RUNNING)
+        self.set_request_pool_map(model_id, pool_id, request_pool, request_queue)
+
+    def add_request(self, pool_id, req):
+        req_q = self.get_request_queue(req.model_id, pool_id)
+        req_q.put(req)
+
+    def remove_request(self, model_id, req_id):
+        pass
+
+    def get_pool_ids(self, model_id) -> list[int]:
+        return self._request_pool_map[model_id].get_pool_ids()
+
+    def has_request_pools(self, model_id) -> bool:
+        return model_id in self._request_pool_map
+
+    def get_request_pool_map(self) -> Dict[str, PoolGroup]:
+        return self._request_pool_map
+
+    def get_request_pools_group(self, model_id) -> Optional[PoolGroup]:
+        return self._request_pool_map.get(model_id, None)
+
+    def get_request_pool(self, model_id, pool_id) -> InferenceRequestPool:
+        return self._request_pool_map[model_id].get_request_pool(pool_id)
+
+    def get_request_queue(self, model_id, pool_id) -> mp.Queue:
+        return self._request_pool_map[model_id].get_request_queue(pool_id)
+
+    def set_request_pool_map(self, model_id, pool_id, request_pool, request_queue):
+        if model_id not in self._request_pool_map:
+            self._request_pool_map[model_id] = PoolGroup(model_id)
+        self._request_pool_map[model_id].add_pool(pool_id, request_pool, request_queue)
+
+    def get_state(self, model_id, pool_id) -> PoolState:
+        return self.pool_states[model_id][pool_id]
+
+    def set_state(self, model_id, pool_id, state):
+        self.pool_states[model_id][pool_id] = state
+
+    def get_load(self, model_id, pool_id) -> int:
+        pass
+
+    def shutdown(self):
+        for model_id, pool_group in self._request_pool_map.items():
+            for pool_id in pool_group.get_pool_ids():
+                request_pool = pool_group.get_request_pool(pool_id)
+                request_queue = pool_group.get_request_queue(pool_id)
+                request_pool.stop()
+                while not request_queue.empty():
+                    request_queue.get_nowait()
+                request_queue.close()
+            for pool_id in pool_group.get_pool_ids():
+                request_pool = pool_group.get_request_pool(pool_id)
+                request_pool.join(timeout=10)
@@ -0,0 +1,125 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+import threading
+
+import torch
+import torch.multiprocessing as mp
+
+from ainode.core.exception import (
+    InferenceModelInternalError,
+)
+from ainode.core.inference.inference_request_pool import InferenceRequestPool, PoolState
+from ainode.core.inference.pool_controller import PoolController
+from ainode.core.log import Logger
+from ainode.core.manager.utils import (
+    _estimate_pool_size,
+)
+from ainode.core.model.sundial.configuration_sundial import SundialConfig
+from ainode.core.model.timerxl.configuration_timer import TimerConfig
+from ainode.core.util.decorator import synchronized
+
+logger = Logger()
+
+
+class PoolScheduler:
+    """
+    A Scheduler to init the request pools.
+    It initializes the first pool and starts a background thread to expand pools
+    as needed based on the model_id.
+    """
+
+    DEFAULT_DEVICE = torch.device("cpu")
+    # DEFAULT_DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    def __init__(self, pool_controller: PoolController, result_queue: mp.Queue):
+        self._pool_controller = pool_controller
+        self._result_queue = result_queue
+
+    @synchronized(threading.Lock())
+    def first_req_init(self, model_id: str):
+        if not self._pool_controller.has_request_pools(model_id):
+            pool_num = _estimate_pool_size(self.DEFAULT_DEVICE, model_id)
+            if pool_num <= 0:
+                raise InferenceModelInternalError(
+                    f"Not enough memory to run model {model_id}."
+                )
+            # initialize the first pool
+            self._first_pool_init(model_id)
+            # start a background thread to expand pools
+            expand_thread = threading.Thread(
+                target=self._expand_pools,
+                args=(model_id, 1, pool_num - 1),
+                daemon=True,
+            )
+            expand_thread.start()
+
+    def _first_pool_init(self, model_id: str):
+        if model_id == "sundial":
+            config = SundialConfig()
+        elif model_id == "timer_xl":
+            config = TimerConfig()
+        first_queue = mp.Queue()
+        ready_event = mp.Event()
+        first_pool = InferenceRequestPool(
+            pool_id=0,
+            model_id=model_id,
+            config=config,
+            request_queue=first_queue,
+            result_queue=self._result_queue,
+            ready_event=ready_event,
+        )
+        first_pool.start()
+        self._pool_controller.set_state(model_id, 0, PoolState.INITIALIZING)
+        if not ready_event.wait(timeout=30):
+            logger.error(
+                f"[Inference][Device-{self.DEFAULT_DEVICE}][Pool-0] First pool failed to be ready in time"
+            )
+        else:
+            self._pool_controller.register_pool(model_id, 0, first_pool, first_queue)
+            logger.info(
+                f"[Inference][Device-{self.DEFAULT_DEVICE}][Pool-0] Initialized inference request pool for model {model_id}"
+            )
+
+    def _expand_pools(self, model_id, start_idx, count):
+        for idx in range(count):
+            queue = mp.Queue()
+            pool_id = start_idx + idx
+            if model_id == "sundial":
+                config = SundialConfig()
+            elif model_id == "timer_xl":
+                config = TimerConfig()
+            pool = InferenceRequestPool(
+                pool_id=pool_id,
+                model_id=model_id,
+                config=config,
+                request_queue=queue,
+                result_queue=self._result_queue,
+                ready_event=mp.Event(),
+            )
+            pool.start()
+            self._pool_controller.set_state(model_id, pool_id, PoolState.INITIALIZING)
+            if not pool.ready_event.wait(timeout=30):
+                logger.error(
+                    f"[Inference][Device-{self.DEFAULT_DEVICE}][Pool-{pool_id}] Pool failed to be ready in time"
+                )
+                continue
+            else:
+                self._pool_controller.register_pool(model_id, pool_id, pool, queue)
+                logger.info(
+                    f"[Inference][Device-{self.DEFAULT_DEVICE}][Pool-{pool.pool_id}] New inference request pool started for model {model_id}"
+                )