Merge pull request #92 from gleize/fix-memory-leak

shepnerd · web-flow · commit c32495b09a32 · 2024-04-01T07:26:02.000+08:00
Fix memory leak.
diff --git a/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_evl/evl_utils/clip_vit_fusion.py b/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_evl/evl_utils/clip_vit_fusion.py
@@ -249,7 +249,8 @@ def forward(self, x, mode='video', return_all_feats=False):
                 _, tmp_feats = tmp_x[:1], tmp_x[1:]
                 tmp_feats = tmp_feats.permute(1, 3, 2, 0).reshape(N, C, T_down, H, W)
                 tmp_feats = self.dpe[j](tmp_feats).view(N, C, T_down, L - 1).permute(3, 0, 2, 1)
-                tmp_x[1:] = tmp_x[1:] + tmp_feats
+                # tmp_x[1:] = tmp_x[1:] + tmp_feats # memory leak        
+                tmp_x = torch.cat([tmp_x[:1], tmp_x[1:] + tmp_feats], dim=0) # no memory leak
                 # enhancer
                 tmp_x = tmp_x.permute(2, 0, 1, 3).flatten(0, 1)  # T * L, N, C
                 cls_token = self.dec[j](cls_token, tmp_x)
diff --git a/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_evl/evl_utils/clip_vit_only_global.py b/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_evl/evl_utils/clip_vit_only_global.py
@@ -224,7 +224,8 @@ def forward(self, x, mode='video', return_all_feats=False):
                 _, tmp_feats = tmp_x[:1], tmp_x[1:]
                 tmp_feats = tmp_feats.permute(1, 3, 2, 0).reshape(N, C, T_down, H, W)
                 tmp_feats = self.dpe[j](tmp_feats).view(N, C, T_down, L - 1).permute(3, 0, 2, 1)
-                tmp_x[1:] = tmp_x[1:] + tmp_feats
+                # tmp_x[1:] = tmp_x[1:] + tmp_feats # memory leak        
+                tmp_x = torch.cat([tmp_x[:1], tmp_x[1:] + tmp_feats], dim=0) # no memory leak
                 # enhancer
                 tmp_x = tmp_x.permute(2, 0, 1, 3).flatten(0, 1)  # T * L, N, C
                 cls_token = self.dec[j](cls_token, tmp_x)
diff --git a/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_kc_new/evl_utils/clip_vit_fusion.py b/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_kc_new/evl_utils/clip_vit_fusion.py
@@ -249,7 +249,8 @@ def forward(self, x, mode='video', return_all_feats=False):
                 _, tmp_feats = tmp_x[:1], tmp_x[1:]
                 tmp_feats = tmp_feats.permute(1, 3, 2, 0).reshape(N, C, T_down, H, W)
                 tmp_feats = self.dpe[j](tmp_feats).view(N, C, T_down, L - 1).permute(3, 0, 2, 1)
-                tmp_x[1:] = tmp_x[1:] + tmp_feats
+                # tmp_x[1:] = tmp_x[1:] + tmp_feats # memory leak        
+                tmp_x = torch.cat([tmp_x[:1], tmp_x[1:] + tmp_feats], dim=0) # no memory leak
                 # enhancer
                 tmp_x = tmp_x.permute(2, 0, 1, 3).flatten(0, 1)  # T * L, N, C
                 cls_token = self.dec[j](cls_token, tmp_x)
diff --git a/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_kc_new/evl_utils/clip_vit_only_global.py b/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_kc_new/evl_utils/clip_vit_only_global.py
@@ -224,7 +224,8 @@ def forward(self, x, mode='video', return_all_feats=False):
                 _, tmp_feats = tmp_x[:1], tmp_x[1:]
                 tmp_feats = tmp_feats.permute(1, 3, 2, 0).reshape(N, C, T_down, H, W)
                 tmp_feats = self.dpe[j](tmp_feats).view(N, C, T_down, L - 1).permute(3, 0, 2, 1)
-                tmp_x[1:] = tmp_x[1:] + tmp_feats
+                # tmp_x[1:] = tmp_x[1:] + tmp_feats # memory leak        
+                tmp_x = torch.cat([tmp_x[:1], tmp_x[1:] + tmp_feats], dim=0) # no memory leak
                 # enhancer
                 tmp_x = tmp_x.permute(2, 0, 1, 3).flatten(0, 1)  # T * L, N, C
                 cls_token = self.dec[j](cls_token, tmp_x)
diff --git a/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/modules/InternVideo/clip_utils/utils/clip_vit_only_global.py b/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/modules/InternVideo/clip_utils/utils/clip_vit_only_global.py
@@ -224,7 +224,8 @@ def forward(self, x, mode='video', return_all_feats=False):
                 _, tmp_feats = tmp_x[:1], tmp_x[1:].clone()
                 tmp_feats = tmp_feats.permute(1, 3, 2, 0).reshape(N, C, T_down, H, W)
                 tmp_feats = self.dpe[j](tmp_feats).view(N, C, T_down, L - 1).permute(3, 0, 2, 1)
-                tmp_x[1:] = tmp_x[1:] + tmp_feats
+                # tmp_x[1:] = tmp_x[1:] + tmp_feats # memory leak        
+                tmp_x = torch.cat([tmp_x[:1], tmp_x[1:] + tmp_feats], dim=0) # no memory leak
                 # enhancer
                 tmp_x = tmp_x.permute(2, 0, 1, 3).flatten(0, 1)  # T * L, N, C
                 cls_token = self.dec[j](cls_token, tmp_x)
diff --git a/InternVideo1/Pretrain/Multi-Modalities-Pretraining/InternVideo/clip_utils/utils/clip_vit_only_global.py b/InternVideo1/Pretrain/Multi-Modalities-Pretraining/InternVideo/clip_utils/utils/clip_vit_only_global.py
@@ -224,7 +224,8 @@ def forward(self, x, mode='video', return_all_feats=False):
                 _, tmp_feats = tmp_x[:1], tmp_x[1:]
                 tmp_feats = tmp_feats.permute(1, 3, 2, 0).reshape(N, C, T_down, H, W)
                 tmp_feats = self.dpe[j](tmp_feats).view(N, C, T_down, L - 1).permute(3, 0, 2, 1)
-                tmp_x[1:] = tmp_x[1:] + tmp_feats
+                # tmp_x[1:] = tmp_x[1:] + tmp_feats # memory leak        
+                tmp_x = torch.cat([tmp_x[:1], tmp_x[1:] + tmp_feats], dim=0) # no memory leak
                 # enhancer
                 tmp_x = tmp_x.permute(2, 0, 1, 3).flatten(0, 1)  # T * L, N, C
                 cls_token = self.dec[j](cls_token, tmp_x)
diff --git a/InternVideo1/Pretrain/UniFormerV2/slowfast/models/uniformerv2_model.py b/InternVideo1/Pretrain/UniFormerV2/slowfast/models/uniformerv2_model.py
@@ -261,7 +261,8 @@ def forward(self, x):
                 _, tmp_feats = tmp_x[:1], tmp_x[1:]
                 tmp_feats = tmp_feats.permute(1, 3, 2, 0).reshape(N, C, T_down, H, W)
                 tmp_feats = self.dpe[j](tmp_feats.clone()).view(N, C, T_down, L - 1).permute(3, 0, 2, 1).contiguous()
-                tmp_x[1:] = tmp_x[1:] + tmp_feats
+                # tmp_x[1:] = tmp_x[1:] + tmp_feats # memory leak        
+                tmp_x = torch.cat([tmp_x[:1], tmp_x[1:] + tmp_feats], dim=0) # no memory leak
                 # global block
                 tmp_x = tmp_x.permute(2, 0, 1, 3).flatten(0, 1)  # T * L, N, C
                 cls_token = self.dec[j](cls_token, tmp_x)