Standardize the dimensions of the input and output of forecast task to 3D

RkGrit · RkGrit · commit c5c89f8fdb00 · 2025-12-17T19:20:03.000+08:00
diff --git a/iotdb-core/ainode/iotdb/ainode/core/inference/inference_request.py b/iotdb-core/ainode/iotdb/ainode/core/inference/inference_request.py
@@ -42,7 +42,7 @@ def __init__(
         output_length: int = 96,
         **infer_kwargs,
     ):
-        if inputs.ndim == 1:
+        while inputs.ndim < 3:
             inputs = inputs.unsqueeze(0)
 
         self.req_id = req_id
@@ -54,15 +54,16 @@ def __init__(
         )
 
         self.batch_size = inputs.size(0)
+        self.variable_size = inputs.size(1)
         self.state = InferenceRequestState.WAITING
         self.cur_step_idx = 0  # Current write position in the output step index
         self.assigned_pool_id = -1  # The pool handling this request
         self.assigned_device_id = -1  # The device handling this request
 
         # Preallocate output buffer [batch_size, max_new_tokens]
         self.output_tensor = torch.zeros(
-            self.batch_size, output_length, device="cpu"
-        )  # shape: [self.batch_size, max_new_steps]
+            self.batch_size, self.variable_size, output_length, device="cpu"
+        )  # shape: [batch_size, target_count, predict_length]
 
     def mark_running(self):
         self.state = InferenceRequestState.RUNNING
@@ -77,26 +78,26 @@ def is_finished(self) -> bool:
         )
 
     def write_step_output(self, step_output: torch.Tensor):
-        if step_output.ndim == 1:
+        while step_output.ndim < 3:
             step_output = step_output.unsqueeze(0)
 
-        batch_size, step_size = step_output.shape
+        batch_size, variable_size, step_size = step_output.shape
         end_idx = self.cur_step_idx + step_size
 
         if end_idx > self.output_length:
-            self.output_tensor[:, self.cur_step_idx :] = step_output[
-                :, : self.output_length - self.cur_step_idx
+            self.output_tensor[:, :, self.cur_step_idx :] = step_output[
+                :, :, : self.output_length - self.cur_step_idx
             ]
             self.cur_step_idx = self.output_length
         else:
-            self.output_tensor[:, self.cur_step_idx : end_idx] = step_output
+            self.output_tensor[:, :, self.cur_step_idx : end_idx] = step_output
             self.cur_step_idx = end_idx
 
         if self.is_finished():
             self.mark_finished()
 
     def get_final_output(self) -> torch.Tensor:
-        return self.output_tensor[:, : self.cur_step_idx]
+        return self.output_tensor[:, :, : self.cur_step_idx]
 
 
 class InferenceRequestProxy:
diff --git a/iotdb-core/ainode/iotdb/ainode/core/inference/pipeline/basic_pipeline.py b/iotdb-core/ainode/iotdb/ainode/core/inference/pipeline/basic_pipeline.py
@@ -20,6 +20,7 @@
 
 import torch
 
+from iotdb.ainode.core.exception import InferenceModelInternalException
 from iotdb.ainode.core.model.model_loader import load_model
 
 
@@ -37,7 +38,7 @@ def preprocess(self, inputs):
         raise NotImplementedError("preprocess not implemented")
 
     @abstractmethod
-    def postprocess(self, output: torch.Tensor):
+    def postprocess(self, outputs: torch.Tensor):
         """
         Post-process the outputs after the entire inference task.
         """
@@ -49,14 +50,28 @@ def __init__(self, model_info, **model_kwargs):
         super().__init__(model_info, model_kwargs=model_kwargs)
 
     def preprocess(self, inputs):
+        """
+        The inputs should be 3D tensor: [batch_size, target_count, sequence_length].
+        """
+        if len(inputs.shape) != 3:
+            raise InferenceModelInternalException(
+                f"[Inference] Input must be: [batch_size, target_count, sequence_length], but receives {inputs.shape}"
+            )
         return inputs
 
     @abstractmethod
     def forecast(self, inputs, **infer_kwargs):
         pass
 
-    def postprocess(self, output: torch.Tensor):
-        return output
+    def postprocess(self, outputs: torch.Tensor):
+        """
+        The outputs should be 3D tensor: [batch_size, target_count, predict_length].
+        """
+        if len(outputs.shape) != 3:
+            raise InferenceModelInternalException(
+                f"[Inference] Output must be: [batch_size, target_count, predict_length], but receives {outputs.shape}"
+            )
+        return outputs
 
 
 class ClassificationPipeline(BasicPipeline):
@@ -70,8 +85,8 @@ def preprocess(self, inputs):
     def classify(self, inputs, **kwargs):
         pass
 
-    def postprocess(self, output: torch.Tensor):
-        return output
+    def postprocess(self, outputs: torch.Tensor):
+        return outputs
 
 
 class ChatPipeline(BasicPipeline):
@@ -85,5 +100,5 @@ def preprocess(self, inputs):
     def chat(self, inputs, **kwargs):
         pass
 
-    def postprocess(self, output: torch.Tensor):
-        return output
+    def postprocess(self, outputs: torch.Tensor):
+        return outputs
diff --git a/iotdb-core/ainode/iotdb/ainode/core/manager/inference_manager.py b/iotdb-core/ainode/iotdb/ainode/core/manager/inference_manager.py
@@ -46,7 +46,10 @@
 from iotdb.ainode.core.manager.model_manager import ModelManager
 from iotdb.ainode.core.rpc.status import get_status
 from iotdb.ainode.core.util.gpu_mapping import get_available_devices
-from iotdb.ainode.core.util.serde import convert_to_binary
+from iotdb.ainode.core.util.serde import (
+    convert_tensor_to_tsblock,
+    convert_tsblock_to_tensor,
+)
 from iotdb.thrift.ainode.ttypes import (
     TForecastReq,
     TForecastResp,
@@ -58,7 +61,6 @@
     TUnloadModelReq,
 )
 from iotdb.thrift.common.ttypes import TSStatus
-from iotdb.tsfile.utils.tsblock_serde import deserialize
 
 logger = Logger()
 
@@ -170,23 +172,14 @@ def _run(
         self,
         req,
         data_getter,
-        deserializer,
         extract_attrs,
         resp_cls,
-        single_output: bool,
+        single_batch: bool,
     ):
         model_id = req.modelId
         try:
             raw = data_getter(req)
-            # full data deserialized from iotdb is composed of [timestampList, valueList, None, length], we only get valueList currently.
-            full_data = deserializer(raw)
-            # TODO: TSBlock -> Tensor codes should be unified
-            data = full_data[1][0]  # get valueList in ndarray
-            if data.dtype.byteorder not in ("=", "|"):
-                np_data = data.byteswap()
-                data = np_data.view(np_data.dtype.newbyteorder())
-            # the inputs should be on CPU before passing to the inference request
-            inputs = torch.tensor(data).unsqueeze(0).float().to("cpu")
+            inputs = convert_tsblock_to_tensor(raw)
 
             inference_attrs = extract_attrs(req)
             output_length = int(inference_attrs.pop("output_length", 96))
@@ -211,7 +204,6 @@ def _run(
                     output_length=output_length,
                 )
                 outputs = self._process_request(infer_req)
-                outputs = convert_to_binary(pd.DataFrame(outputs[0]))
             else:
                 model_info = self._model_manager.get_model_info(model_id)
                 inference_pipeline = load_pipeline(model_info, device="cpu")
@@ -228,45 +220,46 @@ def _run(
                     outputs = None
                     logger.error("[Inference] Unsupported pipeline type.")
                 outputs = inference_pipeline.postprocess(outputs)
-                outputs = convert_to_binary(pd.DataFrame(outputs[0]))
 
-            # construct response
-            status = get_status(TSStatusCode.SUCCESS_STATUS)
+            # convert tensor into tsblock for the output in each batch
+            output_list = []
+            for batch_idx in range(outputs.size(0)):
+                output = convert_tensor_to_tsblock(outputs[batch_idx])
+                output_list.append(output)
 
-            if isinstance(outputs, list):
-                return resp_cls(status, outputs[0] if single_output else outputs)
-            return resp_cls(status, outputs if single_output else [outputs])
+            return resp_cls(
+                get_status(TSStatusCode.SUCCESS_STATUS),
+                output_list[0] if single_batch else output_list,
+            )
 
         except Exception as e:
             logger.error(e)
             status = get_status(TSStatusCode.AINODE_INTERNAL_ERROR, str(e))
-            empty = b"" if single_output else []
+            empty = b"" if single_batch else []
             return resp_cls(status, empty)
 
     def forecast(self, req: TForecastReq):
         return self._run(
             req,
             data_getter=lambda r: r.inputData,
-            deserializer=deserialize,
             extract_attrs=lambda r: {
                 "output_length": r.outputLength,
                 **(r.options or {}),
             },
             resp_cls=TForecastResp,
-            single_output=True,
+            single_batch=True,
         )
 
     def inference(self, req: TInferenceReq):
         return self._run(
             req,
             data_getter=lambda r: r.dataset,
-            deserializer=deserialize,
             extract_attrs=lambda r: {
                 "output_length": int(r.inferenceAttributes.pop("outputLength", 96)),
                 **(r.inferenceAttributes or {}),
             },
             resp_cls=TInferenceResp,
-            single_output=False,
+            single_batch=False,
         )
 
     def stop(self):
diff --git a/iotdb-core/ainode/iotdb/ainode/core/model/sktime/pipeline_sktime.py b/iotdb-core/ainode/iotdb/ainode/core/model/sktime/pipeline_sktime.py
@@ -20,6 +20,7 @@
 import pandas as pd
 import torch
 
+from iotdb.ainode.core.exception import InferenceModelInternalException
 from iotdb.ainode.core.inference.pipeline.basic_pipeline import ForecastPipeline
 
 
@@ -29,6 +30,12 @@ def __init__(self, model_info, **model_kwargs):
         super().__init__(model_info, model_kwargs=model_kwargs)
 
     def preprocess(self, inputs):
+        inputs = super().preprocess(inputs)
+        if inputs.shape[1] != 1:
+            raise InferenceModelInternalException(
+                f"[Inference] Sundial model only supports single target, but receives {inputs.shape[1]} series."
+            )
+        inputs = inputs.squeeze(1)
         return inputs
 
     def forecast(self, inputs, **infer_kwargs):
@@ -47,21 +54,22 @@ def forecast(self, inputs, **infer_kwargs):
                 )
                 output = self.model.generate(series, predict_length=predict_length)
                 outputs.append(output)
-            output = np.array(outputs)
+            outputs = np.array(outputs)
         else:
             # Single sample: convert to Series
             if isinstance(inputs, torch.Tensor):
                 series = pd.Series(inputs.squeeze().cpu().numpy())
             else:
                 series = pd.Series(inputs.squeeze())
-            output = self.model.generate(series, predict_length=predict_length)
+            outputs = self.model.generate(series, predict_length=predict_length)
             # Add batch dimension if needed
-            if len(output.shape) == 1:
-                output = output[np.newaxis, :]
+            if len(outputs.shape) == 1:
+                outputs = outputs[np.newaxis, :]
 
-        return output
+        return outputs
 
-    def postprocess(self, output):
-        if isinstance(output, np.ndarray):
-            return torch.from_numpy(output).float()
-        return output
+    def postprocess(self, outputs):
+        if isinstance(outputs, np.ndarray):
+            outputs = torch.from_numpy(outputs).float()
+        outputs = super().postprocess(outputs.unsqueeze(1))
+        return outputs
diff --git a/iotdb-core/ainode/iotdb/ainode/core/model/sundial/pipeline_sundial.py b/iotdb-core/ainode/iotdb/ainode/core/model/sundial/pipeline_sundial.py
@@ -27,24 +27,35 @@ def __init__(self, model_info, **model_kwargs):
         super().__init__(model_info, model_kwargs=model_kwargs)
 
     def preprocess(self, inputs):
-        if len(inputs.shape) != 2:
+        """
+        The inputs shape should be 3D, but Sundial only supports 2D tensor: [batch_size, sequence_length],
+        we need to squeeze the target_count dimension.
+        """
+        inputs = super().preprocess(inputs)
+        if inputs.shape[1] != 1:
             raise InferenceModelInternalException(
-                f"[Inference] Input shape must be: [batch_size, seq_len], but receives {inputs.shape}"
+                f"[Inference] Sundial model only supports single target, but receives {inputs.shape[1]} series."
             )
+        inputs = inputs.squeeze(1)
         return inputs
 
     def forecast(self, inputs, **infer_kwargs):
         predict_length = infer_kwargs.get("predict_length", 96)
         num_samples = infer_kwargs.get("num_samples", 10)
         revin = infer_kwargs.get("revin", True)
 
-        output = self.model.generate(
+        outputs = self.model.generate(
             inputs,
             max_new_tokens=predict_length,
             num_samples=num_samples,
             revin=revin,
         )
-        return output
-
-    def postprocess(self, output: torch.Tensor):
-        return output.mean(dim=1)
+        return outputs
+
+    def postprocess(self, outputs: torch.Tensor):
+        """
+        The outputs shape should be 3D, we need to take the mean value across num_samples dimension and expand dims.
+        """
+        outputs = outputs.mean(dim=1).unsqueeze(1)
+        outputs = super().postprocess(outputs)
+        return outputs
diff --git a/iotdb-core/ainode/iotdb/ainode/core/model/timer_xl/pipeline_timer.py b/iotdb-core/ainode/iotdb/ainode/core/model/timer_xl/pipeline_timer.py
@@ -27,18 +27,30 @@ def __init__(self, model_info, **model_kwargs):
         super().__init__(model_info, model_kwargs=model_kwargs)
 
     def preprocess(self, inputs):
-        if len(inputs.shape) != 2:
+        """
+        The inputs shape should be 3D, but Timer-XL only supports 2D tensor: [batch_size, sequence_length],
+        we need to squeeze the target_count dimension.
+        """
+        inputs = super().preprocess(inputs)
+        if inputs.shape[1] != 1:
             raise InferenceModelInternalException(
-                f"[Inference] Input shape must be: [batch_size, seq_len], but receives {inputs.shape}"
+                f"[Inference] Timer-XL model only supports single target, but receives {inputs.shape[1]} series."
             )
+        inputs = inputs.squeeze(1)
         return inputs
 
     def forecast(self, inputs, **infer_kwargs):
         predict_length = infer_kwargs.get("predict_length", 96)
         revin = infer_kwargs.get("revin", True)
 
-        output = self.model.generate(inputs, max_new_tokens=predict_length, revin=revin)
-        return output
-
-    def postprocess(self, output: torch.Tensor):
-        return output
+        outputs = self.model.generate(
+            inputs, max_new_tokens=predict_length, revin=revin
+        )
+        return outputs
+
+    def postprocess(self, outputs: torch.Tensor):
+        """
+        The outputs shape should be 3D, so we need to expand dims.
+        """
+        outputs = super().postprocess(outputs.unsqueeze(1))
+        return outputs
diff --git a/iotdb-core/ainode/iotdb/ainode/core/util/serde.py b/iotdb-core/ainode/iotdb/ainode/core/util/serde.py