apache
diff --git a/‎iotdb-core/ainode/iotdb/ainode/core/model/sundial/modeling_sundial.py‎
Lines changed: 9 additions & 4 deletions b/‎iotdb-core/ainode/iotdb/ainode/core/model/sundial/modeling_sundial.py‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎iotdb-core/ainode/iotdb/ainode/core/model/timer_xl/modeling_timer.py‎
Lines changed: 5 additions & 1 deletion b/‎iotdb-core/ainode/iotdb/ainode/core/model/timer_xl/modeling_timer.py‎
Lines changed: 5 additions & 1 deletion
@@ -610,7 +610,11 @@ def prepare_inputs_for_generation(
             if attention_mask is not None and attention_mask.shape[1] > (
                 input_ids.shape[1] // self.config.input_token_len
             ):
-                input_ids = input_ids[:, -(attention_mask.shape[1] - past_length) :]
+                input_ids = input_ids[
+                    :,
+                    -(attention_mask.shape[1] - past_length)
+                    * self.config.input_token_len :,
+                ]
             # 2 - If the past_length is smaller than input_ids', then input_ids holds all input tokens. We can discard
             # input_ids based on the past_length.
             elif past_length < (input_ids.shape[1] // self.config.input_token_len):
@@ -623,9 +627,10 @@ def prepare_inputs_for_generation(
             position_ids = attention_mask.long().cumsum(-1) - 1
             position_ids.masked_fill_(attention_mask == 0, 1)
             if past_key_values:
-                position_ids = position_ids[
-                    :, -(input_ids.shape[1] // self.config.input_token_len) :
-                ]
+                token_num = (
+                    input_ids.shape[1] + self.config.input_token_len - 1
+                ) // self.config.input_token_len
+                position_ids = position_ids[:, -token_num:]
 
         # if `inputs_embeds` are passed, we only want to use them in the 1st generation step
         if inputs_embeds is not None and past_key_values is None:
 
@@ -603,7 +603,11 @@ def prepare_inputs_for_generation(
             if attention_mask is not None and attention_mask.shape[1] > (
                 input_ids.shape[1] // self.config.input_token_len
             ):
-                input_ids = input_ids[:, -(attention_mask.shape[1] - past_length) :]
+                input_ids = input_ids[
+                    :,
+                    -(attention_mask.shape[1] - past_length)
+                    * self.config.input_token_len :,
+                ]
             # 2 - If the past_length is smaller than input_ids', then input_ids holds all input tokens. We can discard
             # input_ids based on the past_length.
             elif past_length < (input_ids.shape[1] // self.config.input_token_len):