fix sundial and forecast

Liu Zhengyun · Liu Zhengyun · commit dd2afc6b064b · 2025-11-18T15:35:09.000+08:00
diff --git a/iotdb-core/ainode/iotdb/ainode/core/model/sundial/modeling_sundial.py b/iotdb-core/ainode/iotdb/ainode/core/model/sundial/modeling_sundial.py
@@ -603,35 +603,42 @@ def prepare_inputs_for_generation(
         **kwargs,
     ):
         # Omit tokens covered by past_key_values
+        past_length = 0
+        token_num = (
+            input_ids.shape[1] + self.config.input_token_len - 1
+        ) // self.config.input_token_len
+
         if past_key_values is not None:
             if isinstance(past_key_values, Cache):
                 past_length = past_key_values.get_seq_length()
             else:
                 past_length = past_key_values[0][0].shape[2]
 
+        if past_key_values is not None and past_length > 0:
             # Keep only the unprocessed tokens:
             # 1 - If the length of the attention_mask exceeds the length of input_ids, then we are in a setting where
             # some of the inputs are exclusively passed as part of the cache (e.g. when passing input_embeds as
             # input)
-            if attention_mask is not None and attention_mask.shape[1] > (
-                input_ids.shape[1] // self.config.input_token_len
-            ):
+            if attention_mask is not None and attention_mask.shape[1] > token_num:
                 input_ids = input_ids[:, -(attention_mask.shape[1] - past_length) :]
             # 2 - If the past_length is smaller than input_ids', then input_ids holds all input tokens. We can discard
             # input_ids based on the past_length.
-            elif past_length < (input_ids.shape[1] // self.config.input_token_len):
-                input_ids = input_ids[:, past_length * self.config.input_token_len :]
+            elif past_length < token_num:
+                # TODO: Actually, we need to know the output_token_lens used in the last generation step.
+                #  Sundial will pad the input when it is non-divisible, so we cannot use past_length to slice input_ids
+                input_ids = input_ids[:, -self.config.output_token_lens[0] :]
             # 3 - Otherwise (past_length >= (input_ids.shape[1] // self.config.input_token_len)), let's assume input_ids only has unprocessed tokens.
 
         position_ids = kwargs.get("position_ids", None)
         if attention_mask is not None and position_ids is None:
             # create position_ids on the fly for batch generation
             position_ids = attention_mask.long().cumsum(-1) - 1
             position_ids.masked_fill_(attention_mask == 0, 1)
-            if past_key_values:
-                position_ids = position_ids[
-                    :, -(input_ids.shape[1] // self.config.input_token_len) :
-                ]
+            if past_key_values is not None and past_length > 0:
+                token_num = (
+                    input_ids.shape[1] + self.config.input_token_len - 1
+                ) // self.config.input_token_len
+                position_ids = position_ids[:, -token_num:]
 
         # if `inputs_embeds` are passed, we only want to use them in the 1st generation step
         if inputs_embeds is not None and past_key_values is None:
diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/udf/UDTFForecast.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/udf/UDTFForecast.java
@@ -114,7 +114,6 @@ public void beforeStart(UDFParameters parameters, UDTFConfigurations configurati
     }
     ModelInferenceDescriptor descriptor = modelFetcher.fetchModel(this.model_id);
     this.targetAINode = descriptor.getTargetAINode();
-    this.maxInputLength = descriptor.getModelInformation().getInputShape()[0];
 
     this.outputInterval = parameters.getLongOrDefault(OUTPUT_INTERVAL, DEFAULT_OUTPUT_INTERVAL);
     this.outputLength =

Original file line number	Diff line number	Diff line change
`@@ -114,7 +114,6 @@ public void beforeStart(UDFParameters parameters, UDTFConfigurations configurati`
`114`	`114`	`}`
`115`	`115`	`ModelInferenceDescriptor descriptor = modelFetcher.fetchModel(this.model_id);`
`116`	`116`	`this.targetAINode = descriptor.getTargetAINode();`
`117`		`- this.maxInputLength = descriptor.getModelInformation().getInputShape()[0];`
`118`	`117`
`119`	`118`	`this.outputInterval = parameters.getLongOrDefault(OUTPUT_INTERVAL, DEFAULT_OUTPUT_INTERVAL);`
`120`	`119`	`this.outputLength =`