Add deprecation

hlky · hlky · commit f2e7731fe3c1 · 2024-12-17T16:46:34.000Z
diff --git a/examples/community/pipeline_hunyuandit_differential_img2img.py b/examples/community/pipeline_hunyuandit_differential_img2img.py
@@ -1009,6 +1009,7 @@ def __call__(
             grid_crops_coords,
             (grid_height, grid_width),
             device=device,
+            output_type="pt",
         )
 
         style = torch.tensor([0], device=device)
diff --git a/src/diffusers/models/embeddings.py b/src/diffusers/models/embeddings.py
@@ -957,7 +957,9 @@ def get_3d_rotary_pos_embed_allegro(
     return freqs_t, freqs_h, freqs_w, grid_t, grid_h, grid_w
 
 
-def get_2d_rotary_pos_embed(embed_dim, crops_coords, grid_size, use_real=True, device: Optional[torch.device] = None):
+def get_2d_rotary_pos_embed(
+    embed_dim, crops_coords, grid_size, use_real=True, device: Optional[torch.device] = None, output_type: str = "np"
+):
     """
     RoPE for image tokens with 2d structure.
 
@@ -976,6 +978,19 @@ def get_2d_rotary_pos_embed(embed_dim, crops_coords, grid_size, use_real=True, d
     Returns:
         `torch.Tensor`: positional embedding with shape `( grid_size * grid_size, embed_dim/2)`.
     """
+    if output_type == "np":
+        deprecation_message = (
+            "`get_2d_sincos_pos_embed` uses `torch` and supports `device`."
+            " `from_numpy` is no longer required."
+            "  Pass `output_type='pt' to use the new version now."
+        )
+        deprecate("output_type=='np'", "0.33.0", deprecation_message, standard_warn=False)
+        return _get_2d_rotary_pos_embed_np(
+            embed_dim=embed_dim,
+            crops_coords=crops_coords,
+            grid_size=grid_size,
+            use_real=use_real,
+        )
     start, stop = crops_coords
     # scale end by (steps−1)/steps matches np.linspace(..., endpoint=False)
     grid_h = torch.linspace(
@@ -992,6 +1007,34 @@ def get_2d_rotary_pos_embed(embed_dim, crops_coords, grid_size, use_real=True, d
     return pos_embed
 
 
+def _get_2d_rotary_pos_embed_np(embed_dim, crops_coords, grid_size, use_real=True):
+    """
+    RoPE for image tokens with 2d structure.
+
+    Args:
+    embed_dim: (`int`):
+        The embedding dimension size
+    crops_coords (`Tuple[int]`)
+        The top-left and bottom-right coordinates of the crop.
+    grid_size (`Tuple[int]`):
+        The grid size of the positional embedding.
+    use_real (`bool`):
+        If True, return real part and imaginary part separately. Otherwise, return complex numbers.
+
+    Returns:
+        `torch.Tensor`: positional embedding with shape `( grid_size * grid_size, embed_dim/2)`.
+    """
+    start, stop = crops_coords
+    grid_h = np.linspace(start[0], stop[0], grid_size[0], endpoint=False, dtype=np.float32)
+    grid_w = np.linspace(start[1], stop[1], grid_size[1], endpoint=False, dtype=np.float32)
+    grid = np.meshgrid(grid_w, grid_h)  # here w goes first
+    grid = np.stack(grid, axis=0)  # [2, W, H]
+
+    grid = grid.reshape([2, 1, *grid.shape[1:]])
+    pos_embed = get_2d_rotary_pos_embed_from_grid(embed_dim, grid, use_real=use_real)
+    return pos_embed
+
+
 def get_2d_rotary_pos_embed_from_grid(embed_dim, grid, use_real=False):
     """
     Get 2D RoPE from grid.
diff --git a/src/diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py b/src/diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py
@@ -929,6 +929,7 @@ def __call__(
             grid_crops_coords,
             (grid_height, grid_width),
             device=device,
+            output_type="pt",
         )
 
         style = torch.tensor([0], device=device)
diff --git a/src/diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py b/src/diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py
@@ -802,6 +802,7 @@ def __call__(
             grid_crops_coords,
             (grid_height, grid_width),
             device=device,
+            output_type="pt",
         )
 
         style = torch.tensor([0], device=device)
diff --git a/src/diffusers/pipelines/pag/pipeline_pag_hunyuandit.py b/src/diffusers/pipelines/pag/pipeline_pag_hunyuandit.py
@@ -822,6 +822,7 @@ def __call__(
             grid_crops_coords,
             (grid_height, grid_width),
             device=device,
+            output_type="pt",
         )
 
         style = torch.tensor([0], device=device)

Original file line number	Diff line number	Diff line change
`@@ -1009,6 +1009,7 @@ def __call__(`
`1009`	`1009`	`grid_crops_coords,`
`1010`	`1010`	`(grid_height, grid_width),`
`1011`	`1011`	`device=device,`
	`1012`	`+ output_type="pt",`
`1012`	`1013`	`)`
`1013`	`1014`
`1014`	`1015`	`style = torch.tensor([0], device=device)`
Original file line number	Diff line number	Diff line change
`@@ -929,6 +929,7 @@ def __call__(`
`929`	`929`	`grid_crops_coords,`
`930`	`930`	`(grid_height, grid_width),`
`931`	`931`	`device=device,`
	`932`	`+ output_type="pt",`
`932`	`933`	`)`
`933`	`934`
`934`	`935`	`style = torch.tensor([0], device=device)`
Original file line number	Diff line number	Diff line change
`@@ -802,6 +802,7 @@ def __call__(`
`802`	`802`	`grid_crops_coords,`
`803`	`803`	`(grid_height, grid_width),`
`804`	`804`	`device=device,`
	`805`	`+ output_type="pt",`
`805`	`806`	`)`
`806`	`807`
`807`	`808`	`style = torch.tensor([0], device=device)`
Original file line number	Diff line number	Diff line change
`@@ -822,6 +822,7 @@ def __call__(`
`822`	`822`	`grid_crops_coords,`
`823`	`823`	`(grid_height, grid_width),`
`824`	`824`	`device=device,`
	`825`	`+ output_type="pt",`
`825`	`826`	`)`
`826`	`827`
`827`	`828`	`style = torch.tensor([0], device=device)`