Skip to content

Commit ffb35fe

Browse files
authored
Fix in-place modification of user-input in SAM2 embed boxes (huggingface#42173)
* Do not modify boxes tensor in-place
1 parent 1fd63dd commit ffb35fe

File tree

5 files changed

+5
-5
lines changed

5 files changed

+5
-5
lines changed

src/transformers/models/edgetam/modeling_edgetam.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -600,7 +600,7 @@ def _embed_points(self, points: torch.Tensor, labels: torch.Tensor, pad: bool) -
600600

601601
def _embed_boxes(self, boxes: torch.Tensor) -> torch.Tensor:
602602
"""Embeds box prompts."""
603-
boxes += 0.5 # Shift to center of pixel
603+
boxes = boxes + 0.5 # Shift to center of pixel
604604
coords = boxes.view(*boxes.shape[:2], 2, 2)
605605
# add padding point for consistency with the original implementation
606606
coords = torch.nn.functional.pad(coords, (0, 0, 0, 1), mode="constant", value=0)

src/transformers/models/edgetam_video/modeling_edgetam_video.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1644,7 +1644,7 @@ def _embed_points(self, points: torch.Tensor, labels: torch.Tensor, pad: bool) -
16441644

16451645
def _embed_boxes(self, boxes: torch.Tensor) -> torch.Tensor:
16461646
"""Embeds box prompts."""
1647-
boxes += 0.5 # Shift to center of pixel
1647+
boxes = boxes + 0.5 # Shift to center of pixel
16481648
coords = boxes.view(*boxes.shape[:2], 2, 2)
16491649
# add padding point for consistency with the original implementation
16501650
coords = torch.nn.functional.pad(coords, (0, 0, 0, 1), mode="constant", value=0)

src/transformers/models/sam2/modeling_sam2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -792,7 +792,7 @@ def _embed_points(self, points: torch.Tensor, labels: torch.Tensor, pad: bool) -
792792

793793
def _embed_boxes(self, boxes: torch.Tensor) -> torch.Tensor:
794794
"""Embeds box prompts."""
795-
boxes += 0.5 # Shift to center of pixel
795+
boxes = boxes + 0.5 # Shift to center of pixel
796796
coords = boxes.view(*boxes.shape[:2], 2, 2)
797797
# add padding point for consistency with the original implementation
798798
coords = torch.nn.functional.pad(coords, (0, 0, 0, 1), mode="constant", value=0)

src/transformers/models/sam2/modular_sam2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -890,7 +890,7 @@ def _embed_points(self, points: torch.Tensor, labels: torch.Tensor, pad: bool) -
890890

891891
def _embed_boxes(self, boxes: torch.Tensor) -> torch.Tensor:
892892
"""Embeds box prompts."""
893-
boxes += 0.5 # Shift to center of pixel
893+
boxes = boxes + 0.5 # Shift to center of pixel
894894
coords = boxes.view(*boxes.shape[:2], 2, 2)
895895
# add padding point for consistency with the original implementation
896896
coords = torch.nn.functional.pad(coords, (0, 0, 0, 1), mode="constant", value=0)

src/transformers/models/sam2_video/modeling_sam2_video.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1227,7 +1227,7 @@ def _embed_points(self, points: torch.Tensor, labels: torch.Tensor, pad: bool) -
12271227

12281228
def _embed_boxes(self, boxes: torch.Tensor) -> torch.Tensor:
12291229
"""Embeds box prompts."""
1230-
boxes += 0.5 # Shift to center of pixel
1230+
boxes = boxes + 0.5 # Shift to center of pixel
12311231
coords = boxes.view(*boxes.shape[:2], 2, 2)
12321232
# add padding point for consistency with the original implementation
12331233
coords = torch.nn.functional.pad(coords, (0, 0, 0, 1), mode="constant", value=0)

0 commit comments

Comments
 (0)