apache
diff --git a/‎iotdb-core/ainode/ainode/core/config.py‎
Lines changed: 17 additions & 0 deletions b/‎iotdb-core/ainode/ainode/core/config.py‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎iotdb-core/ainode/ainode/core/constant.py‎
Lines changed: 8 additions & 3 deletions b/‎iotdb-core/ainode/ainode/core/constant.py‎
Lines changed: 8 additions & 3 deletions
diff --git a/‎iotdb-core/ainode/ainode/core/inference/inference_request.py‎
Lines changed: 36 additions & 25 deletions b/‎iotdb-core/ainode/ainode/core/inference/inference_request.py‎
Lines changed: 36 additions & 25 deletions
@@ -30,6 +30,7 @@
     AINODE_CONF_FILE_NAME,
     AINODE_CONF_GIT_FILE_NAME,
     AINODE_CONF_POM_FILE_NAME,
+    AINODE_INFERENCE_BATCH_INTERVAL_IN_MS,
     AINODE_INFERENCE_RPC_ADDRESS,
     AINODE_INFERENCE_RPC_PORT,
     AINODE_LOG_DIR,
@@ -55,6 +56,9 @@ def __init__(self):
         # Used for connection of DataNode/ConfigNode clients
         self._ain_inference_rpc_address: str = AINODE_INFERENCE_RPC_ADDRESS
         self._ain_inference_rpc_port: int = AINODE_INFERENCE_RPC_PORT
+        self._ain_inference_batch_interval_in_ms: int = (
+            AINODE_INFERENCE_BATCH_INTERVAL_IN_MS
+        )
 
         # log directory
         self._ain_logs_dir: str = AINODE_LOG_DIR
@@ -132,6 +136,14 @@ def get_ain_inference_rpc_port(self) -> int:
     def set_ain_inference_rpc_port(self, ain_inference_rpc_port: int) -> None:
         self._ain_inference_rpc_port = ain_inference_rpc_port
 
+    def get_ain_inference_batch_interval_in_ms(self) -> int:
+        return self._ain_inference_batch_interval_in_ms
+
+    def set_ain_inference_batch_interval_in_ms(
+        self, ain_inference_batch_interval_in_ms: int
+    ) -> None:
+        self._ain_inference_batch_interval_in_ms = ain_inference_batch_interval_in_ms
+
     def get_ain_logs_dir(self) -> str:
         return self._ain_logs_dir
 
@@ -273,6 +285,11 @@ def _load_config_from_file(self) -> None:
                     int(file_configs["ain_inference_rpc_port"])
                 )
 
+            if "ain_inference_batch_interval_in_ms" in config_keys:
+                self._config.set_ain_inference_batch_interval_in_ms(
+                    int(file_configs["ain_inference_batch_interval_in_ms"])
+                )
+
             if "ain_models_dir" in config_keys:
                 self._config.set_ain_models_dir(file_configs["ain_models_dir"])
 
 
@@ -29,25 +29,30 @@
 AINODE_CONF_GIT_FILE_NAME = "git.properties"
 AINODE_CONF_POM_FILE_NAME = "pom.properties"
 AINODE_SYSTEM_FILE_NAME = "system.properties"
+
 # inference_rpc_address
 AINODE_INFERENCE_RPC_ADDRESS = "127.0.0.1"
-AINODE_INFERENCE_RPC_PORT = 11810
+AINODE_INFERENCE_RPC_PORT = 10810
+AINODE_INFERENCE_BATCH_INTERVAL_IN_MS = 15
+
 # AINode folder structure
 AINODE_MODELS_DIR = "data/ainode/models"
 AINODE_BUILTIN_MODELS_DIR = "data/ainode/models/weights"  # For built-in models, we only need to store their weights and config.
 AINODE_SYSTEM_DIR = "data/ainode/system"
 AINODE_LOG_DIR = "logs/ainode"
 AINODE_THRIFT_COMPRESSION_ENABLED = False
+
 # use for node management
-AINODE_CLUSTER_NAME = "yongzaoCluster"
+AINODE_CLUSTER_NAME = "defaultCluster"
 AINODE_VERSION_INFO = "UNKNOWN"
 AINODE_BUILD_INFO = "UNKNOWN"
 AINODE_ROOT_DIR = os.path.dirname(
     os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 )
+
 # connect IoTDB cluster
 AINODE_CLUSTER_INGRESS_ADDRESS = "127.0.0.1"
-AINODE_CLUSTER_INGRESS_PORT = 7667
+AINODE_CLUSTER_INGRESS_PORT = 6667
 AINODE_CLUSTER_INGRESS_USERNAME = "root"
 AINODE_CLUSTER_INGRESS_PASSWORD = "root"
 AINODE_CLUSTER_INGRESS_TIME_ZONE = "UTC+8"
 
@@ -21,6 +21,9 @@
 import torch
 
 from ainode.core.inference.strategy.abstract_strategy import AbstractStrategy
+from ainode.core.log import Logger
+
+logger = Logger()
 
 
 class InferenceRequestState:
@@ -32,7 +35,7 @@ class InferenceRequestState:
 class InferenceRequest:
     def __init__(
         self,
-        req_id: int,
+        req_id: str,
         inputs: torch.Tensor,
         strategy: AbstractStrategy,
         max_new_tokens: int = 96,
@@ -41,7 +44,7 @@ def __init__(
         if inputs.ndim == 1:
             inputs = inputs.unsqueeze(0)
 
-        self.id = req_id
+        self.req_id = req_id
         self.inputs = inputs
         self.infer_kwargs = infer_kwargs
         self.strategy = strategy
@@ -59,9 +62,6 @@ def __init__(
             self.batch_size, max_new_tokens, device=device
         )  # shape: [self.batch_size, max_new_steps]
 
-        self._lock = threading.Lock()
-        self._condition = threading.Condition(self._lock)
-
     def mark_running(self):
         self.state = InferenceRequestState.RUNNING
 
@@ -75,34 +75,45 @@ def is_finished(self) -> bool:
         )
 
     def write_step_output(self, step_output: torch.Tensor):
-        with self._lock:
-            if step_output.ndim == 1:
-                step_output = step_output.unsqueeze(0)
+        if step_output.ndim == 1:
+            step_output = step_output.unsqueeze(0)
 
-            batch_size, step_size = step_output.shape
-            end_idx = self.cur_step_idx + step_size
+        batch_size, step_size = step_output.shape
+        end_idx = self.cur_step_idx + step_size
 
-            if end_idx > self.max_new_tokens:
-                self.output_tensor[:, self.cur_step_idx :] = step_output[
-                    :, : self.max_new_tokens - self.cur_step_idx
-                ]
-                self.cur_step_idx = self.max_new_tokens
-            else:
-                self.output_tensor[:, self.cur_step_idx : end_idx] = step_output
-                self.cur_step_idx = end_idx
+        if end_idx > self.max_new_tokens:
+            self.output_tensor[:, self.cur_step_idx :] = step_output[
+                :, : self.max_new_tokens - self.cur_step_idx
+            ]
+            self.cur_step_idx = self.max_new_tokens
+        else:
+            self.output_tensor[:, self.cur_step_idx : end_idx] = step_output
+            self.cur_step_idx = end_idx
 
-            if self.is_finished():
-                self.mark_finished()
+        if self.is_finished():
+            self.mark_finished()
 
     def get_final_output(self) -> torch.Tensor:
-        with self._lock:
-            return self.output_tensor[:, : self.cur_step_idx]
+        return self.output_tensor[:, : self.cur_step_idx]
+
+
+class InferenceRequestProxy:
+    """
+    Wrap the raw request for handling multiprocess processing.
+    """
+
+    def __init__(self, req_id: str):
+        self.req_id = req_id
+        self.result = None
+        self._lock = threading.Lock()
+        self._condition = threading.Condition(self._lock)
 
-    def notify_completion(self):
+    def set_result(self, result: Any):
         with self._lock:
+            self.result = result
             self._condition.notify_all()
 
     def wait_for_completion(self) -> Any:
         with self._lock:
-            while self.state != InferenceRequestState.FINISHED:
-                self._condition.wait()
+            self._condition.wait()
+            return self.result