diff --git a/paddleformers/transformers/conversion_utils.py b/paddleformers/transformers/conversion_utils.py index 4650111a63..8142463420 100644 --- a/paddleformers/transformers/conversion_utils.py +++ b/paddleformers/transformers/conversion_utils.py @@ -745,7 +745,7 @@ def fn(x, is_column=True, transpose=False, is_old_qkv=False, is_naive_2fuse=Fals return None if transpose: if isinstance(x, paddle.Tensor): - x = paddle.transpose(x, [1, 0]) + x = paddle.transpose(x, [1, 0]).contiguous() else: x = np.transpose(x, [1, 0]) if is_old_qkv: @@ -1252,7 +1252,7 @@ def convert_transpose_selected_weights(state_dict: dict, transpose_weight_keys: continue for trans_key in transpose_weight_keys: if re.search(f"\.{trans_key}\.weight$", key) or re.fullmatch(f"^{trans_key}\.weight$", key): - state_dict[key] = state_dict.pop(key).transpose([-1, -2]) + state_dict[key] = state_dict.pop(key).transpose([-1, -2]).contiguous() return state_dict @classmethod diff --git a/paddleformers/transformers/model_utils.py b/paddleformers/transformers/model_utils.py index 540146aea1..fa7b2c3a15 100644 --- a/paddleformers/transformers/model_utils.py +++ b/paddleformers/transformers/model_utils.py @@ -397,7 +397,7 @@ def _is_need_transpose(key): def _transpose_hf_weight(key, weight): if _is_need_transpose(key): - return weight.transpose([-1, -2]) + return weight.transpose([-1, -2]).contiguous() return weight part_state_dict = {}