Fix call inference cannot specify outputLength

CRZbulabula · CRZbulabula · commit 381aea89a704 · 2025-12-05T17:18:04.000+08:00
diff --git a/integration-test/src/test/java/org/apache/iotdb/ainode/it/AINodeCallInferenceIT.java b/integration-test/src/test/java/org/apache/iotdb/ainode/it/AINodeCallInferenceIT.java
@@ -34,10 +34,12 @@
 
 import java.sql.Connection;
 import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
 import java.sql.SQLException;
 import java.sql.Statement;
 
 import static org.apache.iotdb.ainode.utils.AINodeTestUtils.BUILTIN_MODEL_MAP;
+import static org.apache.iotdb.ainode.utils.AINodeTestUtils.checkHeader;
 import static org.apache.iotdb.ainode.utils.AINodeTestUtils.errorTest;
 import static org.apache.iotdb.db.it.utils.TestUtils.prepareData;
 
@@ -55,7 +57,8 @@ public class AINodeCallInferenceIT {
       };
 
   private static final String CALL_INFERENCE_SQL_TEMPLATE =
-      "CALL INFERENCE(%s, \"select s%d from root.AI\")";
+      "CALL INFERENCE(%s, \"SELECT s%d FROM root.AI LIMIT %d\", generateTime=true, outputLength=%d)";
+  private static final int DEFAULT_OUTPUT_LENGTH = 48;
 
   @BeforeClass
   public static void setUp() throws Exception {
@@ -93,14 +96,21 @@ public void callInferenceTest(Statement statement, AINodeTestUtils.FakeModelInfo
     // Invoke call inference for specified models, there should exist result.
     for (int i = 0; i < 4; i++) {
       String callInferenceSQL =
-          String.format(CALL_INFERENCE_SQL_TEMPLATE, modelInfo.getModelId(), i);
+          String.format(
+              CALL_INFERENCE_SQL_TEMPLATE,
+              modelInfo.getModelId(),
+              i,
+              DEFAULT_OUTPUT_LENGTH,
+              DEFAULT_OUTPUT_LENGTH);
       try (ResultSet resultSet = statement.executeQuery(callInferenceSQL)) {
+        ResultSetMetaData resultSetMetaData = resultSet.getMetaData();
+        checkHeader(resultSetMetaData, "Time,output");
         int count = 0;
         while (resultSet.next()) {
           count++;
         }
         // Ensure the call inference return results
-        Assert.assertTrue(count > 0);
+        Assert.assertEquals(DEFAULT_OUTPUT_LENGTH, count);
       }
     }
   }
diff --git a/iotdb-core/ainode/iotdb/ainode/core/config.py b/iotdb-core/ainode/iotdb/ainode/core/config.py
@@ -32,7 +32,7 @@
     AINODE_CONF_POM_FILE_NAME,
     AINODE_INFERENCE_BATCH_INTERVAL_IN_MS,
     AINODE_INFERENCE_EXTRA_MEMORY_RATIO,
-    AINODE_INFERENCE_MAX_PREDICT_LENGTH,
+    AINODE_INFERENCE_MAX_OUTPUT_LENGTH,
     AINODE_INFERENCE_MEMORY_USAGE_RATIO,
     AINODE_INFERENCE_MODEL_MEM_USAGE_MAP,
     AINODE_LOG_DIR,
@@ -75,9 +75,7 @@ def __init__(self):
         self._ain_inference_batch_interval_in_ms: int = (
             AINODE_INFERENCE_BATCH_INTERVAL_IN_MS
         )
-        self._ain_inference_max_predict_length: int = (
-            AINODE_INFERENCE_MAX_PREDICT_LENGTH
-        )
+        self._ain_inference_max_output_length: int = AINODE_INFERENCE_MAX_OUTPUT_LENGTH
         self._ain_inference_model_mem_usage_map: dict[str, int] = (
             AINODE_INFERENCE_MODEL_MEM_USAGE_MAP
         )
@@ -160,13 +158,13 @@ def set_ain_inference_batch_interval_in_ms(
     ) -> None:
         self._ain_inference_batch_interval_in_ms = ain_inference_batch_interval_in_ms
 
-    def get_ain_inference_max_predict_length(self) -> int:
-        return self._ain_inference_max_predict_length
+    def get_ain_inference_max_output_length(self) -> int:
+        return self._ain_inference_max_output_length
 
-    def set_ain_inference_max_predict_length(
-        self, ain_inference_max_predict_length: int
+    def set_ain_inference_max_output_length(
+        self, ain_inference_max_output_length: int
     ) -> None:
-        self._ain_inference_max_predict_length = ain_inference_max_predict_length
+        self._ain_inference_max_output_length = ain_inference_max_output_length
 
     def get_ain_inference_model_mem_usage_map(self) -> dict[str, int]:
         return self._ain_inference_model_mem_usage_map
diff --git a/iotdb-core/ainode/iotdb/ainode/core/constant.py b/iotdb-core/ainode/iotdb/ainode/core/constant.py
@@ -48,7 +48,7 @@
 
 # AINode inference configuration
 AINODE_INFERENCE_BATCH_INTERVAL_IN_MS = 15
-AINODE_INFERENCE_MAX_PREDICT_LENGTH = 2880
+AINODE_INFERENCE_MAX_OUTPUT_LENGTH = 2880
 
 # TODO: Should be optimized
 AINODE_INFERENCE_MODEL_MEM_USAGE_MAP = {
diff --git a/iotdb-core/ainode/iotdb/ainode/core/inference/inference_request.py b/iotdb-core/ainode/iotdb/ainode/core/inference/inference_request.py
@@ -39,7 +39,7 @@ def __init__(
         req_id: str,
         model_id: str,
         inputs: torch.Tensor,
-        max_new_tokens: int = 96,
+        output_length: int = 96,
         **infer_kwargs,
     ):
         if inputs.ndim == 1:
@@ -49,8 +49,8 @@ def __init__(
         self.model_id = model_id
         self.inputs = inputs
         self.infer_kwargs = infer_kwargs
-        self.max_new_tokens = (
-            max_new_tokens  # Number of time series data points to generate
+        self.output_length = (
+            output_length  # Number of time series data points to generate
         )
 
         self.batch_size = inputs.size(0)
@@ -61,7 +61,7 @@ def __init__(
 
         # Preallocate output buffer [batch_size, max_new_tokens]
         self.output_tensor = torch.zeros(
-            self.batch_size, max_new_tokens, device="cpu"
+            self.batch_size, output_length, device="cpu"
         )  # shape: [self.batch_size, max_new_steps]
 
     def mark_running(self):
@@ -73,7 +73,7 @@ def mark_finished(self):
     def is_finished(self) -> bool:
         return (
             self.state == InferenceRequestState.FINISHED
-            or self.cur_step_idx >= self.max_new_tokens
+            or self.cur_step_idx >= self.output_length
         )
 
     def write_step_output(self, step_output: torch.Tensor):
@@ -83,11 +83,11 @@ def write_step_output(self, step_output: torch.Tensor):
         batch_size, step_size = step_output.shape
         end_idx = self.cur_step_idx + step_size
 
-        if end_idx > self.max_new_tokens:
+        if end_idx > self.output_length:
             self.output_tensor[:, self.cur_step_idx :] = step_output[
-                :, : self.max_new_tokens - self.cur_step_idx
+                :, : self.output_length - self.cur_step_idx
             ]
-            self.cur_step_idx = self.max_new_tokens
+            self.cur_step_idx = self.output_length
         else:
             self.output_tensor[:, self.cur_step_idx : end_idx] = step_output
             self.cur_step_idx = end_idx
diff --git a/iotdb-core/ainode/iotdb/ainode/core/inference/inference_request_pool.py b/iotdb-core/ainode/iotdb/ainode/core/inference/inference_request_pool.py
@@ -115,7 +115,7 @@ def _step(self):
 
         grouped_requests = defaultdict(list)
         for req in all_requests:
-            key = (req.inputs.shape[1], req.max_new_tokens)
+            key = (req.inputs.shape[1], req.output_length)
             grouped_requests[key].append(req)
         grouped_requests = list(grouped_requests.values())
 
@@ -124,7 +124,7 @@ def _step(self):
             if isinstance(self._inference_pipeline, ForecastPipeline):
                 batch_output = self._inference_pipeline.forecast(
                     batch_inputs,
-                    predict_length=requests[0].max_new_tokens,
+                    predict_length=requests[0].output_length,
                     revin=True,
                 )
             elif isinstance(self._inference_pipeline, ClassificationPipeline):
diff --git a/iotdb-core/ainode/iotdb/ainode/core/manager/inference_manager.py b/iotdb-core/ainode/iotdb/ainode/core/manager/inference_manager.py
@@ -189,26 +189,26 @@ def _run(
             inputs = torch.tensor(data).unsqueeze(0).float().to("cpu")
 
             inference_attrs = extract_attrs(req)
-            predict_length = int(inference_attrs.pop("predict_length", 96))
+            output_length = int(inference_attrs.pop("output_length", 96))
             if (
-                predict_length
-                > AINodeDescriptor().get_config().get_ain_inference_max_predict_length()
+                output_length
+                > AINodeDescriptor().get_config().get_ain_inference_max_output_length()
             ):
                 raise NumericalRangeException(
                     "output_length",
                     1,
                     AINodeDescriptor()
                     .get_config()
-                    .get_ain_inference_max_predict_length(),
-                    predict_length,
+                    .get_ain_inference_max_output_length(),
+                    output_length,
                 )
 
             if self._pool_controller.has_request_pools(model_id):
                 infer_req = InferenceRequest(
                     req_id=generate_req_id(),
                     model_id=model_id,
                     inputs=inputs,
-                    max_new_tokens=predict_length,
+                    output_length=output_length,
                 )
                 outputs = self._process_request(infer_req)
                 outputs = convert_to_binary(pd.DataFrame(outputs[0]))
@@ -217,7 +217,7 @@ def _run(
                 inference_pipeline = load_pipeline(model_info, device="cpu")
                 if isinstance(inference_pipeline, ForecastPipeline):
                     outputs = inference_pipeline.forecast(
-                        inputs, predict_length=predict_length, **inference_attrs
+                        inputs, predict_length=output_length, **inference_attrs
                     )
                 elif isinstance(inference_pipeline, ClassificationPipeline):
                     outputs = inference_pipeline.classify(inputs)
@@ -246,7 +246,7 @@ def forecast(self, req: TForecastReq):
             data_getter=lambda r: r.inputData,
             deserializer=deserialize,
             extract_attrs=lambda r: {
-                "predict_length": r.outputLength,
+                "output_length": r.outputLength,
                 **(r.options or {}),
             },
             resp_cls=TForecastResp,
@@ -259,8 +259,7 @@ def inference(self, req: TInferenceReq):
             data_getter=lambda r: r.dataset,
             deserializer=deserialize,
             extract_attrs=lambda r: {
-                "window_interval": getattr(r.windowParams, "windowInterval", None),
-                "window_step": getattr(r.windowParams, "windowStep", None),
+                "output_length": int(r.inferenceAttributes.pop("outputLength", 96)),
                 **(r.inferenceAttributes or {}),
             },
             resp_cls=TInferenceResp,
diff --git a/iotdb-core/ainode/pyproject.toml b/iotdb-core/ainode/pyproject.toml
@@ -79,7 +79,7 @@ exclude = [
 python = ">=3.11.0,<3.14.0"
 
 # ---- DL / HF stack ----
-torch = ">=2.7.0"
+torch = "^2.7.1"
 torchmetrics = "^1.8.0"
 transformers = "==4.56.2"
 tokenizers = ">=0.22.0,<=0.23.0"
diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/process/ai/InferenceOperator.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/process/ai/InferenceOperator.java
@@ -245,7 +245,8 @@ private void submitInferenceTask() {
                       .borrowClient(AINodeClientManager.AINODE_ID_PLACEHOLDER)) {
                 return client.inference(
                     new TInferenceReq(
-                        modelInferenceDescriptor.getModelId(), serde.serialize(inputTsBlock)));
+                            modelInferenceDescriptor.getModelId(), serde.serialize(inputTsBlock))
+                        .setInferenceAttributes(modelInferenceDescriptor.getInferenceAttributes()));
               } catch (Exception e) {
                 throw new ModelInferenceProcessException(e.getMessage());
               }
diff --git a/iotdb-protocol/thrift-ainode/src/main/thrift/ainode.thrift b/iotdb-protocol/thrift-ainode/src/main/thrift/ainode.thrift
@@ -60,13 +60,7 @@ struct TRegisterModelResp {
 struct TInferenceReq {
   1: required string modelId
   2: required binary dataset
-  3: optional TWindowParams windowParams
-  4: optional map<string, string> inferenceAttributes
-}
-
-struct TWindowParams {
-  1: required i32 windowInterval
-  2: required i32 windowStep
+  3: optional map<string, string> inferenceAttributes
 }
 
 struct TInferenceResp {