Caideyipi
diff --git a/‎iotdb-core/ainode/ainode/core/ai_node.py‎
Lines changed: 2 additions & 0 deletions b/‎iotdb-core/ainode/ainode/core/ai_node.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎iotdb-core/ainode/ainode/core/config.py‎
Lines changed: 51 additions & 1 deletion b/‎iotdb-core/ainode/ainode/core/config.py‎
Lines changed: 51 additions & 1 deletion
diff --git a/‎iotdb-core/ainode/ainode/core/constant.py‎
Lines changed: 9 additions & 0 deletions b/‎iotdb-core/ainode/ainode/core/constant.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎iotdb-core/ainode/ainode/core/inference/inference_request_pool.py‎
Lines changed: 25 additions & 14 deletions b/‎iotdb-core/ainode/ainode/core/inference/inference_request_pool.py‎
Lines changed: 25 additions & 14 deletions
diff --git a/‎iotdb-core/ainode/ainode/core/inference/scheduler/basic_scheduler.py‎
Lines changed: 1 addition & 1 deletion b/‎iotdb-core/ainode/ainode/core/inference/scheduler/basic_scheduler.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎iotdb-core/ainode/ainode/core/inference/strategy/timerxl_inference_pipeline.py‎
Lines changed: 51 additions & 0 deletions b/‎iotdb-core/ainode/ainode/core/inference/strategy/timerxl_inference_pipeline.py‎
Lines changed: 51 additions & 0 deletions
@@ -162,4 +162,6 @@ def stop(self):
             if self._rpc_service:
                 self._rpc_service.stop()
                 self._rpc_service.join(1)
+                if self._rpc_service.is_alive():
+                    logger.warning("RPC service thread failed to stop in time.")
             logger.info("IoTDB-AINode has successfully stopped.")
@@ -31,7 +31,10 @@
     AINODE_CONF_GIT_FILE_NAME,
     AINODE_CONF_POM_FILE_NAME,
     AINODE_INFERENCE_BATCH_INTERVAL_IN_MS,
+    AINODE_INFERENCE_EXTRA_MEMORY_RATIO,
     AINODE_INFERENCE_MAX_PREDICT_LENGTH,
+    AINODE_INFERENCE_MEMORY_USAGE_RATIO,
+    AINODE_INFERENCE_MODEL_MEM_USAGE_MAP,
     AINODE_LOG_DIR,
     AINODE_MODELS_DIR,
     AINODE_ROOT_CONF_DIRECTORY_NAME,
@@ -76,7 +79,15 @@ def __init__(self):
         self._ain_inference_max_predict_length: int = (
             AINODE_INFERENCE_MAX_PREDICT_LENGTH
         )
-
+        self._ain_inference_model_mem_usage_map: dict[str, int] = (
+            AINODE_INFERENCE_MODEL_MEM_USAGE_MAP
+        )
+        self._ain_inference_memory_usage_ratio: float = (
+            AINODE_INFERENCE_MEMORY_USAGE_RATIO
+        )
+        self._ain_inference_extra_memory_ratio: float = (
+            AINODE_INFERENCE_EXTRA_MEMORY_RATIO
+        )
         # log directory
         self._ain_logs_dir: str = AINODE_LOG_DIR
 
@@ -152,6 +163,30 @@ def set_ain_inference_max_predict_length(
     ) -> None:
         self._ain_inference_max_predict_length = ain_inference_max_predict_length
 
+    def get_ain_inference_model_mem_usage_map(self) -> dict[str, int]:
+        return self._ain_inference_model_mem_usage_map
+
+    def set_ain_inference_model_mem_usage_map(
+        self, ain_inference_model_mem_usage_map: dict[str, int]
+    ) -> None:
+        self._ain_inference_model_mem_usage_map = ain_inference_model_mem_usage_map
+
+    def get_ain_inference_memory_usage_ratio(self) -> float:
+        return self._ain_inference_memory_usage_ratio
+
+    def set_ain_inference_memory_usage_ratio(
+        self, ain_inference_memory_usage_ratio: float
+    ) -> None:
+        self._ain_inference_memory_usage_ratio = ain_inference_memory_usage_ratio
+
+    def get_ain_inference_extra_memory_ratio(self) -> float:
+        return self._ain_inference_extra_memory_ratio
+
+    def set_ain_inference_extra_memory_ratio(
+        self, ain_inference_extra_memory_ratio: float
+    ) -> None:
+        self._ain_inference_extra_memory_ratio = ain_inference_extra_memory_ratio
+
     def get_ain_logs_dir(self) -> str:
         return self._ain_logs_dir
 
@@ -294,6 +329,21 @@ def _load_config_from_file(self) -> None:
                     int(file_configs["ain_inference_batch_interval_in_ms"])
                 )
 
+            if "ain_inference_model_mem_usage_map" in config_keys:
+                self._config.set_ain_inference_model_mem_usage_map(
+                    eval(file_configs["ain_inference_model_mem_usage_map"])
+                )
+
+            if "ain_inference_memory_usage_ratio" in config_keys:
+                self._config.set_ain_inference_memory_usage_ratio(
+                    float(file_configs["ain_inference_memory_usage_ratio"])
+                )
+
+            if "ain_inference_extra_memory_ratio" in config_keys:
+                self._config.set_ain_inference_extra_memory_ratio(
+                    float(file_configs["ain_inference_extra_memory_ratio"])
+                )
+
             if "ain_models_dir" in config_keys:
                 self._config.set_ain_models_dir(file_configs["ain_models_dir"])
 
 
@@ -21,6 +21,7 @@
 from enum import Enum
 from typing import List
 
+from ainode.core.model.model_info import BuiltInModelType
 from ainode.thrift.common.ttypes import TEndPoint
 
 AINODE_VERSION_INFO = "UNKNOWN"
@@ -51,6 +52,14 @@
 # AINode inference configuration
 AINODE_INFERENCE_BATCH_INTERVAL_IN_MS = 15
 AINODE_INFERENCE_MAX_PREDICT_LENGTH = 2880
+AINODE_INFERENCE_MODEL_MEM_USAGE_MAP = {
+    BuiltInModelType.SUNDIAL.value: 1036 * 1024**2,  # 1036 MiB
+    BuiltInModelType.TIMER_XL.value: 856 * 1024**2,  # 856 MiB
+}  # the memory usage of each model in bytes
+AINODE_INFERENCE_MEMORY_USAGE_RATIO = 0.4  # the device space allocated for inference
+AINODE_INFERENCE_EXTRA_MEMORY_RATIO = (
+    1.2  # the overhead ratio for inference, used to estimate the pool size
+)
 
 # AINode folder structure
 AINODE_ROOT_DIR = os.path.dirname(
 
@@ -50,6 +50,7 @@ def __init__(
         config: PretrainedConfig,
         request_queue: mp.Queue,
         result_queue: mp.Queue,
+        ready_event,
         **pool_kwargs,
     ):
         super().__init__()
@@ -59,11 +60,8 @@ def __init__(
         self.pool_kwargs = pool_kwargs
         self.model = None
         self._model_manager = None
-        # TODO: Assign device immediately when the pool is created
         self.device = None
-        self.logger = Logger(
-            INFERENCE_LOG_FILE_NAME_PREFIX_TEMPLATE.format(self.device)
-        )
+        self.ready_event = ready_event
 
         self._threads = []
         self._waiting_queue = request_queue  # Requests that are waiting to be processed
@@ -128,15 +126,25 @@ def _step(self):
         requests = self._scheduler.schedule_step()
         # TODO: We need a batcher to accelerate the concurrent inference
         for request in requests:
-            request.inputs = request.inputs.to(self.device)
-            output = self.model.generate(
-                request.inputs,
-                max_new_tokens=request.max_new_tokens,
-                num_samples=10,
-                revin=True,
-            )
-            request.output_tensor = request.output_tensor.to(self.device)
-            request.write_step_output(output[0].mean(dim=0))
+            if self.model_id == "sundial":
+                request.inputs = request.inputs.to(self.device)
+                output = self.model.generate(
+                    request.inputs,
+                    max_new_tokens=request.max_new_tokens,
+                    num_samples=10,
+                    revin=True,
+                )
+                request.output_tensor = request.output_tensor.to(self.device)
+                request.write_step_output(output[0].mean(dim=0))
+            elif self.model_id == "timer_xl":
+                request.inputs = request.inputs.to(self.device)
+                output = self.model.generate(
+                    request.inputs,
+                    max_new_tokens=request.max_new_tokens,
+                    revin=True,
+                )
+                request.output_tensor = request.output_tensor.to(self.device)
+                request.write_step_output(output[0])
             request.inference_pipeline.post_decode()
             if request.is_finished():
                 request.inference_pipeline.post_inference()
@@ -160,8 +168,12 @@ def _requests_execute_loop(self):
     def run(self):
         self._model_manager = ModelManager()
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.logger = Logger(
+            INFERENCE_LOG_FILE_NAME_PREFIX_TEMPLATE.format(self.device)
+        )
         self._scheduler.device = self.device
         self.model = self._model_manager.load_model(self.model_id, {}).to(self.device)
+        self.ready_event.set()
 
         # self._warm_up_and_estimate_memory()
 
@@ -183,4 +195,3 @@ def run(self):
 
     def stop(self):
         self._stop_event.set()
-        self.logger.info(f"[Inference][Pool-{self.pool_id}] stop() called")
 
@@ -66,7 +66,7 @@ def memory_is_available(self):
             )
         logger.debug(
             f"[Inference][Device-{self.device}][Pool-{self.pool_id}] "
-            f"Memory used: {used} bytes, Max memory: {self.max_memory_bytes} bytes"
+            f"Memory used: {used/1024**2:.2f} MB, Max memory: {self.max_memory_bytes/1024**2:.2f} MB"
         )
         return used < self.max_memory_bytes
 
 
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+import torch
+
+from ainode.core.exception import InferenceModelInternalError
+from ainode.core.inference.strategy.abstract_inference_pipeline import (
+    AbstractInferencePipeline,
+)
+from ainode.core.model.timerxl.configuration_timer import TimerConfig
+
+
+class TimerXLInferencePipeline(AbstractInferencePipeline):
+    """
+    Strategy for Timer-XL model inference.
+    """
+
+    def __init__(self, model_config: TimerConfig, **infer_kwargs):
+        super().__init__(model_config, infer_kwargs=infer_kwargs)
+
+    def preprocess_inputs(self, inputs: torch.Tensor):
+        super().preprocess_inputs(inputs)
+        if len(inputs.shape) != 2:
+            raise InferenceModelInternalError(
+                f"[Inference] Input shape must be: [batch_size, seq_len], but receives {inputs.shape}"
+            )
+        # TODO: Disassemble and adapt with TimerXL's ts_generation_mixin.py
+        return inputs
+
+    def post_decode(self):
+        # TODO: Disassemble and adapt with TimerXL's ts_generation_mixin.py
+        pass
+
+    def post_inference(self):
+        # TODO: Disassemble and adapt with TimerXL's ts_generation_mixin.py
+        pass
Original file line number	Diff line number	Diff line change
`@@ -66,7 +66,7 @@ def memory_is_available(self):`
`66`	`66`	`)`
`67`	`67`	`logger.debug(`
`68`	`68`	`f"[Inference][Device-{self.device}][Pool-{self.pool_id}] "`
`69`		`- f"Memory used: {used} bytes, Max memory: {self.max_memory_bytes} bytes"`
	`69`	`+ f"Memory used: {used/10242:.2f} MB, Max memory: {self.max_memory_bytes/10242:.2f} MB"`
`70`	`70`	`)`
`71`	`71`	`return used < self.max_memory_bytes`
`72`	`72`