Add files via upload

1038lab · web-flow · commit 75dc006dfc16 · 2025-07-27T13:46:20.000-07:00
diff --git a/AILab_BiRefNet.py b/AILab_BiRefNet.py
@@ -445,7 +445,8 @@ def hex_to_rgba(hex_color):
                         else:
                             raise ValueError("Invalid color format")
                         return (r, g, b, a)
-                    rgba = hex_to_rgba(params["background_color"])
+                    background_color = params.get("background_color", "#222222")
+                    rgba = hex_to_rgba(background_color)
                     bg_image = Image.new('RGBA', orig_image.size, rgba)
                     composite_image = Image.alpha_composite(bg_image, foreground)
                     processed_images.append(pil2tensor(composite_image.convert("RGB")))
diff --git a/AILab_BodySegment.py b/AILab_BodySegment.py
@@ -213,6 +213,7 @@ def hex_to_rgba(hex_color):
                             raise ValueError("Invalid color format")
                         return (r, g, b, a)
                     rgba_image = RGB2RGBA(orig_image, mask_image)
+                    background_color = params.get("background_color", "#222222")
                     rgba = hex_to_rgba(background_color)
                     bg_image = Image.new('RGBA', orig_image.size, rgba)
                     composite_image = Image.alpha_composite(bg_image, rgba_image)
diff --git a/AILab_ClothSegment.py b/AILab_ClothSegment.py
@@ -249,6 +249,7 @@ def hex_to_rgba(hex_color):
                                 raise ValueError("Invalid color format")
                             return (r, g, b, a)
                         rgba_image = RGB2RGBA(orig_image, mask_image)
+                        background_color = params.get("background_color", "#222222")
                         rgba = hex_to_rgba(background_color)
                         bg_image = Image.new('RGBA', orig_image.size, rgba)
                         composite_image = Image.alpha_composite(bg_image, rgba_image)
diff --git a/AILab_FaceSegment.py b/AILab_FaceSegment.py
@@ -254,6 +254,7 @@ def hex_to_rgba(hex_color):
                                 raise ValueError("Invalid color format")
                             return (r, g, b, a)
                         rgba_image = RGB2RGBA(orig_image, mask_image)
+                        background_color = params.get("background_color", "#222222")
                         rgba = hex_to_rgba(background_color)
                         bg_image = Image.new('RGBA', orig_image.size, rgba)
                         composite_image = Image.alpha_composite(bg_image, rgba_image)
diff --git a/AILab_FashionSegment.py b/AILab_FashionSegment.py
@@ -333,6 +333,7 @@ def hex_to_rgba(hex_color):
                                 raise ValueError("Invalid color format")
                             return (r, g, b, a)
                         rgba_image = RGB2RGBA(orig_image, mask_image)
+                        background_color = params.get("background_color", "#222222")
                         rgba = hex_to_rgba(background_color)
                         bg_image = Image.new('RGBA', orig_image.size, rgba)
                         composite_image = Image.alpha_composite(bg_image, rgba_image)
diff --git a/AILab_ImageMaskTools.py b/AILab_ImageMaskTools.py
diff --git a/AILab_RMBG.py b/AILab_RMBG.py
@@ -633,7 +633,8 @@ def hex_to_rgba(hex_color):
                         else:
                             raise ValueError("Invalid color format")
                         return (r, g, b, a)
-                    rgba = hex_to_rgba(params["background_color"])
+                    background_color = params.get("background_color", "#222222")
+                    rgba = hex_to_rgba(background_color)
                     bg_image = Image.new('RGBA', orig_image.size, rgba)
                     composite_image = Image.alpha_composite(bg_image, foreground)
                     processed_images.append(pil2tensor(composite_image.convert("RGB")))
diff --git a/AILab_Segment.py b/AILab_Segment.py
@@ -188,6 +188,8 @@ def __init__(self):
         self.clean_state_dict = clean_state_dict
         self.SLConfig = SLConfig
         self.build_model = build_model
+        self._sam_model_cache = {}
+        self._dino_model_cache = {}
 
     def segment(self, image, prompt, sam_model, dino_model, threshold=0.35,
                 mask_blur=0, mask_offset=0, background="Alpha", 
@@ -241,6 +243,8 @@ def segment(self, image, prompt, sam_model, dino_model, threshold=0.35,
         return (pil2tensor(result_image), mask_tensor, mask_image_output)
 
     def load_sam(self, model_name):
+        if model_name in self._sam_model_cache:
+            return self._sam_model_cache[model_name]
         sam_checkpoint_path = self.get_local_filepath(
             SAM_MODELS[model_name]["model_url"], "sam")
         model_type = SAM_MODELS[model_name]["model_type"]
@@ -252,9 +256,12 @@ def load_sam(self, model_name):
         sam_device = comfy.model_management.get_torch_device()
         sam.to(device=sam_device)
         sam.eval()
+        self._sam_model_cache[model_name] = sam
         return sam
 
     def load_groundingdino(self, model_name):
+        if model_name in self._dino_model_cache:
+            return self._dino_model_cache[model_name]
         import sys
         from io import StringIO
         temp_stdout = StringIO()
@@ -279,6 +286,7 @@ def load_groundingdino(self, model_name):
             device = comfy.model_management.get_torch_device()
             dino.to(device=device)
             dino.eval()
+            self._dino_model_cache[model_name] = dino
             return dino
         finally:
             output = temp_stdout.getvalue()
diff --git a/AILab_SegmentV2.py b/AILab_SegmentV2.py
@@ -97,11 +97,14 @@ def apply_background_color(image: Image.Image, mask_image: Image.Image,
                          background_color: str = "#222222") -> Image.Image:
     rgba_image = image.copy().convert('RGBA')
     rgba_image.putalpha(mask_image.convert('L'))
+    
     if background == "Color":
         def hex_to_rgba(hex_color):
             hex_color = hex_color.lstrip('#')
             r, g, b = int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16)
             return (r, g, b, 255)
+        params = {"background_color": background_color}
+        background_color = params.get("background_color", "#222222")
         rgba = hex_to_rgba(background_color)
         bg_image = Image.new('RGBA', image.size, rgba)
         composite_image = Image.alpha_composite(bg_image, rgba_image)
@@ -146,7 +149,7 @@ def INPUT_TYPES(cls):
                 "dino_model": (list(DINO_MODELS.keys()),),
             },
             "optional": {
-                "threshold": ("FLOAT", {"default": 0.30, "min": 0.05, "max": 0.95, "step": 0.01, "tooltip": tooltips["threshold"]}),
+                "threshold": ("FLOAT", {"default": 0.35, "min": 0.05, "max": 0.95, "step": 0.01, "tooltip": tooltips["threshold"]}),
                 "mask_blur": ("INT", {"default": 0, "min": 0, "max": 64, "step": 1, "tooltip": tooltips["mask_blur"]}),
                 "mask_offset": ("INT", {"default": 0, "min": -64, "max": 64, "step": 1, "tooltip": tooltips["mask_offset"]}),
                 "invert_output": ("BOOLEAN", {"default": False, "tooltip": tooltips["invert_output"]}),
@@ -167,105 +170,150 @@ def __init__(self):
     def segment_v2(self, image, prompt, sam_model, dino_model, threshold=0.30,
                    mask_blur=0, mask_offset=0, background="Alpha", 
                    background_color="#222222", invert_output=False):
-        img_pil = tensor2pil(image[0]) if image.ndim == 4 else tensor2pil(image)
-        img_np = np.array(img_pil.convert("RGB"))
         device = "cuda" if torch.cuda.is_available() else "cpu"
 
-        # Load GroundingDINO config and weights
-        dino_info = DINO_MODELS[dino_model]
-        config_path = get_or_download_model_file(dino_info["config_filename"], dino_info["config_url"], "grounding-dino")
-        weights_path = get_or_download_model_file(dino_info["model_filename"], dino_info["model_url"], "grounding-dino")
+        # 处理批量图像
+        batch_size = image.shape[0] if len(image.shape) == 4 else 1
+        if len(image.shape) == 3:
+            image = image.unsqueeze(0)
+            
+        result_images = []
+        result_masks = []
+        result_mask_images = []
+        
+        for b in range(batch_size):
+            img_pil = tensor2pil(image[b])
+            img_np = np.array(img_pil.convert("RGB"))
 
-        # Load and cache GroundingDINO model
-        dino_key = (config_path, weights_path, device)
-        if dino_key not in self.dino_model_cache:
-            args = SLConfig.fromfile(config_path)
-            model = build_model(args)
-            checkpoint = torch.load(weights_path, map_location="cpu")
-            model.load_state_dict(clean_state_dict(checkpoint["model"]), strict=False)
-            model.eval()
-            model.to(device)
-            self.dino_model_cache[dino_key] = model
-        dino = self.dino_model_cache[dino_key]
+            # Load GroundingDINO config and weights
+            dino_info = DINO_MODELS[dino_model]
+            config_path = get_or_download_model_file(dino_info["config_filename"], dino_info["config_url"], "grounding-dino")
+            weights_path = get_or_download_model_file(dino_info["model_filename"], dino_info["model_url"], "grounding-dino")
 
-        # Preprocess image for DINO
-        from groundingdino.datasets.transforms import Compose, RandomResize, ToTensor, Normalize
-        transform = Compose([
-            RandomResize([800], max_size=1333),
-            ToTensor(),
-            Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
-        ])
-        image_tensor, _ = transform(img_pil.convert("RGB"), None)
-        image_tensor = image_tensor.unsqueeze(0).to(device)
+            # Load and cache GroundingDINO model
+            dino_key = (config_path, weights_path, device)
+            if dino_key not in self.dino_model_cache:
+                args = SLConfig.fromfile(config_path)
+                model = build_model(args)
+                checkpoint = torch.load(weights_path, map_location="cpu")
+                model.load_state_dict(clean_state_dict(checkpoint["model"]), strict=False)
+                model.eval()
+                model.to(device)
+                self.dino_model_cache[dino_key] = model
+            dino = self.dino_model_cache[dino_key]
 
-        # Prepare text prompt
-        text_prompt = prompt if prompt.endswith(".") else prompt + "."
+            # Download/check SAM weights
+            sam_info = SAM_MODELS[sam_model]
+            sam_ckpt_path = get_or_download_model_file(sam_info["filename"], sam_info["model_url"], "SAM")
 
-        # Forward pass
-        with torch.no_grad():
-            outputs = dino(image_tensor, captions=[text_prompt])
-        logits = outputs["pred_logits"].sigmoid()[0]
-        boxes = outputs["pred_boxes"][0]
+            # Load SAM model (cache to avoid reloading)
+            sam_key = (sam_info["model_type"], sam_ckpt_path, device)
+            if sam_key not in self.sam_model_cache:
+                try:
+                    sam = sam_model_registry[sam_info["model_type"]](checkpoint=sam_ckpt_path)
+                    sam.to(device)
+                    self.sam_model_cache[sam_key] = SamPredictor(sam)
+                except RuntimeError as e:
+                    if "Unexpected key(s) in state_dict" in str(e):
+                        print("Warning: SAM model loading issue detected, please try using SegmentV1 node instead")
+                        print(f"Error details: {str(e)}")
+                        width, height = img_pil.size
+                        empty_mask = torch.zeros((1, height, width), dtype=torch.float32, device="cpu")
+                        empty_mask_rgb = empty_mask.reshape((-1, 1, height, width)).movedim(1, -1).expand(-1, -1, -1, 3)
+                        result_image = apply_background_color(img_pil, Image.fromarray((empty_mask[0].numpy() * 255).astype(np.uint8)), background, background_color)
+                        result_images.append(pil2tensor(result_image))
+                        result_masks.append(empty_mask)
+                        result_mask_images.append(empty_mask_rgb)
+                        continue
+                    else:
+                        raise e
+            predictor = self.sam_model_cache[sam_key]
 
-        # Filter boxes by threshold
-        filt_mask = logits.max(dim=1)[0] > threshold
-        boxes_filt = boxes[filt_mask]
-        if boxes_filt.shape[0] == 0:
-            width, height = img_pil.size
-            empty_mask = torch.zeros((1, height, width), dtype=torch.float32, device="cpu")
-            empty_mask_rgb = empty_mask.reshape((-1, 1, height, width)).movedim(1, -1).expand(-1, -1, -1, 3)
-            result_image = apply_background_color(img_pil, Image.fromarray((empty_mask[0].numpy() * 255).astype(np.uint8)), background, background_color)
-            return (pil2tensor(result_image), empty_mask, empty_mask_rgb)
+            # Preprocess image for DINO
+            from groundingdino.datasets.transforms import Compose, RandomResize, ToTensor, Normalize
+            transform = Compose([
+                RandomResize([800], max_size=1333),
+                ToTensor(),
+                Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
+            ])
+            image_tensor, _ = transform(img_pil.convert("RGB"), None)
+            image_tensor = image_tensor.unsqueeze(0).to(device)
+
+            # Prepare text prompt
+            text_prompt = prompt if prompt.endswith(".") else prompt + "."
+
+            # Forward pass
+            with torch.no_grad():
+                outputs = dino(image_tensor, captions=[text_prompt])
+            logits = outputs["pred_logits"].sigmoid()[0]
+            boxes = outputs["pred_boxes"][0]
 
-        # Convert boxes to xyxy
-        H, W = img_pil.size[1], img_pil.size[0]
-        boxes_xyxy = box_ops.box_cxcywh_to_xyxy(boxes_filt)
-        boxes_xyxy = boxes_xyxy * torch.tensor([W, H, W, H], dtype=torch.float32, device=boxes_xyxy.device)
-        boxes_xyxy = boxes_xyxy.cpu().numpy()
+            # Filter boxes by threshold
+            filt_mask = logits.max(dim=1)[0] > threshold
+            boxes_filt = boxes[filt_mask]
+            
+            # Handle case with no detected boxes
+            if boxes_filt.shape[0] == 0:
+                width, height = img_pil.size
+                empty_mask = torch.zeros((1, height, width), dtype=torch.float32, device="cpu")
+                empty_mask_rgb = empty_mask.reshape((-1, 1, height, width)).movedim(1, -1).expand(-1, -1, -1, 3)
+                result_image = apply_background_color(img_pil, Image.fromarray((empty_mask[0].numpy() * 255).astype(np.uint8)), background, background_color)
+                result_images.append(pil2tensor(result_image))
+                result_masks.append(empty_mask)
+                result_mask_images.append(empty_mask_rgb)
+                continue
 
-        # Download/check SAM weights
-        sam_info = SAM_MODELS[sam_model]
-        sam_ckpt_path = get_or_download_model_file(sam_info["filename"], sam_info["model_url"], "SAM")
+            # Convert boxes to xyxy
+            H, W = img_pil.size[1], img_pil.size[0]
+            boxes_xyxy = box_ops.box_cxcywh_to_xyxy(boxes_filt)
+            boxes_xyxy = boxes_xyxy * torch.tensor([W, H, W, H], dtype=torch.float32, device=boxes_xyxy.device)
+            boxes_xyxy = boxes_xyxy.cpu().numpy()
 
-        # Load SAM model (cache to avoid reloading)
-        sam_key = (sam_info["model_type"], sam_ckpt_path, device)
-        if sam_key not in self.sam_model_cache:
-            sam = sam_model_registry[sam_info["model_type"]](checkpoint=sam_ckpt_path)
-            sam.to(device)
-            self.sam_model_cache[sam_key] = SamPredictor(sam)
-        predictor = self.sam_model_cache[sam_key]
+            # Use SAM to get masks for each box
+            predictor.set_image(img_np)
+            boxes_tensor = torch.tensor(boxes_xyxy, dtype=torch.float32, device=predictor.device)
+            transformed_boxes = predictor.transform.apply_boxes_torch(boxes_tensor, img_np.shape[:2])
+            masks, _, _ = predictor.predict_torch(
+                point_coords=None,
+                point_labels=None,
+                boxes=transformed_boxes,
+                multimask_output=False
+            )
 
-        # Use SAM to get masks for each box
-        predictor.set_image(img_np)
-        boxes_tensor = torch.tensor(boxes_xyxy, dtype=torch.float32, device=predictor.device)
-        transformed_boxes = predictor.transform.apply_boxes_torch(boxes_tensor, img_np.shape[:2])
-        masks, _, _ = predictor.predict_torch(
-            point_coords=None,
-            point_labels=None,
-            boxes=transformed_boxes,
-            multimask_output=False
-        )
-        # Process mask following the original implementation
-        print(f"Mask shape before processing: {masks.shape}")
-        # Combine all masks into one
-        combined_mask = torch.max(masks, dim=0)[0]  # Take maximum across all masks
-        mask = combined_mask.float().cpu().numpy()
-        print(f"Mask shape after processing: {mask.shape}")
-        # Squeeze out the extra dimension to get a 2D array
-        mask = mask.squeeze(0)
-        print(f"Final mask shape: {mask.shape}")
-        mask = (mask * 255).astype(np.uint8)
-        mask_pil = Image.fromarray(mask, mode="L")
+            # Combine all masks into one
+            combined_mask = torch.max(masks, dim=0)[0]  # Take maximum across all masks
+            mask = combined_mask.float().cpu().numpy()
+            mask = mask.squeeze(0)
+            mask = (mask * 255).astype(np.uint8)
+            mask_pil = Image.fromarray(mask, mode="L")
 
-        mask_image = process_mask(mask_pil, invert_output, mask_blur, mask_offset)
-        result_image = apply_background_color(img_pil, mask_image, background, background_color)
-        if background == "Color":
-            result_image = result_image.convert("RGB")
-        else:
-            result_image = result_image.convert("RGBA")
-        mask_tensor = torch.from_numpy(np.array(mask_image).astype(np.float32) / 255.0).unsqueeze(0)
-        mask_image_vis = mask_tensor.reshape((-1, 1, mask_image.height, mask_image.width)).movedim(1, -1).expand(-1, -1, -1, 3)
-        return (pil2tensor(result_image), mask_tensor, mask_image_vis)
+            # Process mask and apply background
+            mask_image = process_mask(mask_pil, invert_output, mask_blur, mask_offset)
+            result_image = apply_background_color(img_pil, mask_image, background, background_color)
+            if background == "Color":
+                result_image = result_image.convert("RGB")
+            else:
+                result_image = result_image.convert("RGBA")
+            
+            # Convert to tensors
+            mask_tensor = torch.from_numpy(np.array(mask_image).astype(np.float32) / 255.0).unsqueeze(0)
+            mask_image_vis = mask_tensor.reshape((-1, 1, mask_image.height, mask_image.width)).movedim(1, -1).expand(-1, -1, -1, 3)
+            
+            result_images.append(pil2tensor(result_image))
+            result_masks.append(mask_tensor)
+            result_mask_images.append(mask_image_vis)
+
+        # 如果没有成功处理任何图像，返回空结果
+        if len(result_images) == 0:
+            width, height = tensor2pil(image[0]).size
+            empty_mask = torch.zeros((batch_size, 1, height, width), dtype=torch.float32, device="cpu")
+            empty_mask_rgb = empty_mask.reshape((-1, 1, height, width)).movedim(1, -1).expand(-1, -1, -1, 3)
+            return (image, empty_mask, empty_mask_rgb)
+            
+        # 合并所有批次的结果
+        return (torch.cat(result_images, dim=0), 
+                torch.cat(result_masks, dim=0), 
+                torch.cat(result_mask_images, dim=0))
 
 NODE_CLASS_MAPPINGS = {
     "SegmentV2": SegmentV2,