Inbora-AI
diff --git a/‎.github/ISSUE_TEMPLATE/bug_report.yaml‎
Lines changed: 77 additions & 0 deletions b/‎.github/ISSUE_TEMPLATE/bug_report.yaml‎
Lines changed: 77 additions & 0 deletions
diff --git a/‎.github/ISSUE_TEMPLATE/feature_request.yaml‎
Lines changed: 34 additions & 0 deletions b/‎.github/ISSUE_TEMPLATE/feature_request.yaml‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎.github/workflows/build_container.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/build_container.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎args_manager.py‎
Lines changed: 6 additions & 3 deletions b/‎args_manager.py‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎css/style.css‎
Lines changed: 1 addition & 1 deletion b/‎css/style.css‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎experiments_mask_generation.py‎
Lines changed: 24 additions & 0 deletions b/‎experiments_mask_generation.py‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎extras/GroundingDINO/config/GroundingDINO_SwinT_OGC.py‎
Lines changed: 43 additions & 0 deletions b/‎extras/GroundingDINO/config/GroundingDINO_SwinT_OGC.py‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎extras/GroundingDINO/util/inference.py‎
Lines changed: 100 additions & 0 deletions b/‎extras/GroundingDINO/util/inference.py‎
Lines changed: 100 additions & 0 deletions
diff --git a/‎extras/censor.py‎
Lines changed: 1 addition & 1 deletion b/‎extras/censor.py‎
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,77 @@
+name: Bug Report
+description: Describe a problem
+title: "[Bug]: "
+labels: ["bug", "triage"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thank you for taking the time to fill out this bug report form!
+  - type: checkboxes
+    id: prerequisites
+    attributes:
+      label: Prerequisites
+      description: Please make sure to troubleshoot yourself before continuing.
+      options:
+        - label: I have read the [Troubleshooting Guide](https://github.com/lllyasviel/Fooocus/blob/main/troubleshoot.md)
+          required: true
+        - label: I have checked that this is not a duplicate of an already existing [issue](https://github.com/lllyasviel/Fooocus/issues)
+          required: true
+  - type: textarea
+    id: description
+    attributes:
+      label: Describe the problem
+      description: Also tell us, what did you expect to happen?
+      placeholder: "A clear and concise description of what the bug is."
+    validations:
+      required: true
+  - type: textarea
+    id: logs
+    attributes:
+      label: Full console log output
+      description: Please copy and paste the **full** console log here. You will make our job easier if you give a **full** log. This will be automatically formatted into code, so no need for backticks.
+      render: shell
+    validations:
+      required: true
+  - type: textarea
+    id: version
+    attributes:
+      label: Version
+      description: What version of Fooocus are you using? (see browser tab title or console log)
+      placeholder: "Example: Fooocus 2.1.855"
+    validations:
+      required: true
+  - type: dropdown
+    id: hosting
+    attributes:
+      label: Where are you running Fooocus?
+      multiple: false
+      options:
+        - Locally
+        - Locally with virtualisation (e.g. Docker)
+        - Cloud (Gradio)
+        - Cloud (other)
+    validations:
+      required: true
+  - type: input
+    id: operating-system
+    attributes:
+      label: Operating System
+      description: What operating system are you using?
+      placeholder: "Example: Windows 10"
+  - type: dropdown
+    id: browsers
+    attributes:
+      label: What browsers are you seeing the problem on?
+      multiple: true
+      options:
+        - Chrome
+        - Firefox
+        - Microsoft Edge
+        - Safari
+        - other
+    validations:
+      required: true
+  - type: markdown
+    attributes:
+      value: "Thank you for completing our form!"
@@ -0,0 +1,34 @@
+name: Feature request
+description: Suggest an idea for this project
+title: "[Feature]: "
+labels: ["enhancement"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thank you for taking the time to fill out this feature request form!
+  - type: checkboxes
+    id: prerequisites
+    attributes:
+      label: Prerequisites
+      options:
+        - label: I have checked that this is not a duplicate of an already existing [feature request](https://github.com/lllyasviel/Fooocus/issues)
+          required: true
+  - type: textarea
+    id: relation-to-problem
+    attributes:
+      label: Is your feature request related to a problem? Please describe.
+      placeholder: "A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+."
+    validations:
+      required: true
+  - type: textarea
+    id: description
+    attributes:
+      label: Describe the idea you'd like
+      placeholder: "A clear and concise description of what you want to happen."
+    validations:
+      required: true
+  - type: markdown
+    attributes:
+      value: "Thank you for completing our form!"
@@ -38,7 +38,7 @@ jobs:
             type=edge,branch=main
 
       - name: Build and push Docker image
-        uses: docker/build-push-action@v5
+        uses: docker/build-push-action@v6
         with:
           context: .
           file: ./Dockerfile
 
@@ -10,6 +10,7 @@ __pycache__
 *.partial
 *.onnx
 sorted_styles.json
+hash_cache.txt
 /input
 /cache
 /language/default.json
 
@@ -28,11 +28,14 @@
 args_parser.parser.add_argument("--disable-preset-download", action='store_true',
                                 help="Disables downloading models for presets", default=False)
 
-args_parser.parser.add_argument("--enable-describe-uov-image", action='store_true',
-                                help="Disables automatic description of uov images when prompt is empty", default=False)
+args_parser.parser.add_argument("--enable-auto-describe-image", action='store_true',
+                                help="Enables automatic description of uov and enhance image when prompt is empty", default=False)
 
 args_parser.parser.add_argument("--always-download-new-model", action='store_true',
-                                help="Always download newer models ", default=False)
+                                help="Always download newer models", default=False)
+
+args_parser.parser.add_argument("--rebuild-hash-cache", help="Generates missing model and LoRA hashes.",
+                                type=int, nargs="?", metavar="CPU_NUM_THREADS", const=-1)
 
 args_parser.parser.set_defaults(
     disable_cuda_malloc=True,
 
@@ -99,7 +99,7 @@ div:has(> #positive_prompt) {
 }
 
 .advanced_check_row {
-  width: 250px !important;
+  width: 330px !important;
 }
 
 .min_check {
 
@@ -0,0 +1,24 @@
+# https://github.com/sail-sg/EditAnything/blob/main/sam2groundingdino_edit.py
+
+import numpy as np
+from PIL import Image
+
+from extras.inpaint_mask import SAMOptions, generate_mask_from_image
+
+original_image = Image.open('cat.webp')
+image = np.array(original_image, dtype=np.uint8)
+
+sam_options = SAMOptions(
+    dino_prompt='eye',
+    dino_box_threshold=0.3,
+    dino_text_threshold=0.25,
+    dino_erode_or_dilate=0,
+    dino_debug=False,
+    max_detections=2,
+    model_type='vit_b'
+)
+
+mask_image, _, _, _ = generate_mask_from_image(image, sam_options=sam_options)
+
+merged_masks_img = Image.fromarray(mask_image)
+merged_masks_img.show()
@@ -0,0 +1,43 @@
+batch_size = 1
+modelname = "groundingdino"
+backbone = "swin_T_224_1k"
+position_embedding = "sine"
+pe_temperatureH = 20
+pe_temperatureW = 20
+return_interm_indices = [1, 2, 3]
+backbone_freeze_keywords = None
+enc_layers = 6
+dec_layers = 6
+pre_norm = False
+dim_feedforward = 2048
+hidden_dim = 256
+dropout = 0.0
+nheads = 8
+num_queries = 900
+query_dim = 4
+num_patterns = 0
+num_feature_levels = 4
+enc_n_points = 4
+dec_n_points = 4
+two_stage_type = "standard"
+two_stage_bbox_embed_share = False
+two_stage_class_embed_share = False
+transformer_activation = "relu"
+dec_pred_bbox_embed_share = True
+dn_box_noise_scale = 1.0
+dn_label_noise_ratio = 0.5
+dn_label_coef = 1.0
+dn_bbox_coef = 1.0
+embed_init_tgt = True
+dn_labelbook_size = 2000
+max_text_len = 256
+text_encoder_type = "bert-base-uncased"
+use_text_enhancer = True
+use_fusion_layer = True
+use_checkpoint = True
+use_transformer_ckpt = True
+use_text_cross_attention = True
+text_dropout = 0.0
+fusion_dropout = 0.0
+fusion_droppath = 0.1
+sub_sentence_present = True
@@ -0,0 +1,100 @@
+from typing import Tuple, List
+
+import ldm_patched.modules.model_management as model_management
+from ldm_patched.modules.model_patcher import ModelPatcher
+from modules.config import path_inpaint
+from modules.model_loader import load_file_from_url
+
+import numpy as np
+import supervision as sv
+import torch
+from groundingdino.util.inference import Model
+from groundingdino.util.inference import load_model, preprocess_caption, get_phrases_from_posmap
+
+
+class GroundingDinoModel(Model):
+    def __init__(self):
+        self.config_file = 'extras/GroundingDINO/config/GroundingDINO_SwinT_OGC.py'
+        self.model = None
+        self.load_device = torch.device('cpu')
+        self.offload_device = torch.device('cpu')
+
+    @torch.no_grad()
+    @torch.inference_mode()
+    def predict_with_caption(
+            self,
+            image: np.ndarray,
+            caption: str,
+            box_threshold: float = 0.35,
+            text_threshold: float = 0.25
+    ) -> Tuple[sv.Detections, torch.Tensor, torch.Tensor, List[str]]:
+        if self.model is None:
+            filename = load_file_from_url(
+                url="https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth",
+                file_name='groundingdino_swint_ogc.pth',
+                model_dir=path_inpaint)
+            model = load_model(model_config_path=self.config_file, model_checkpoint_path=filename)
+
+            self.load_device = model_management.text_encoder_device()
+            self.offload_device = model_management.text_encoder_offload_device()
+
+            model.to(self.offload_device)
+
+            self.model = ModelPatcher(model, load_device=self.load_device, offload_device=self.offload_device)
+
+        model_management.load_model_gpu(self.model)
+
+        processed_image = GroundingDinoModel.preprocess_image(image_bgr=image).to(self.load_device)
+        boxes, logits, phrases = predict(
+            model=self.model,
+            image=processed_image,
+            caption=caption,
+            box_threshold=box_threshold,
+            text_threshold=text_threshold,
+            device=self.load_device)
+        source_h, source_w, _ = image.shape
+        detections = GroundingDinoModel.post_process_result(
+            source_h=source_h,
+            source_w=source_w,
+            boxes=boxes,
+            logits=logits)
+        return detections, boxes, logits, phrases
+
+
+def predict(
+        model,
+        image: torch.Tensor,
+        caption: str,
+        box_threshold: float,
+        text_threshold: float,
+        device: str = "cuda"
+) -> Tuple[torch.Tensor, torch.Tensor, List[str]]:
+    caption = preprocess_caption(caption=caption)
+
+    # override to use model wrapped by patcher
+    model = model.model.to(device)
+    image = image.to(device)
+
+    with torch.no_grad():
+        outputs = model(image[None], captions=[caption])
+
+    prediction_logits = outputs["pred_logits"].cpu().sigmoid()[0]  # prediction_logits.shape = (nq, 256)
+    prediction_boxes = outputs["pred_boxes"].cpu()[0]  # prediction_boxes.shape = (nq, 4)
+
+    mask = prediction_logits.max(dim=1)[0] > box_threshold
+    logits = prediction_logits[mask]  # logits.shape = (n, 256)
+    boxes = prediction_boxes[mask]  # boxes.shape = (n, 4)
+
+    tokenizer = model.tokenizer
+    tokenized = tokenizer(caption)
+
+    phrases = [
+        get_phrases_from_posmap(logit > text_threshold, tokenized, tokenizer).replace('.', '')
+        for logit
+        in logits
+    ]
+
+    return boxes, logits.max(dim=1)[0], phrases
+
+
+default_groundingdino = GroundingDinoModel().predict_with_caption
@@ -41,7 +41,7 @@ def censor(self, images: list | np.ndarray) -> list | np.ndarray:
         model_management.load_model_gpu(self.safety_checker_model)
 
         single = False
-        if not isinstance(images, list) or isinstance(images, np.ndarray):
+        if not isinstance(images, (list, np.ndarray)):
             images = [images]
             single = True
Original file line number	Diff line number	Diff line change
`@@ -99,7 +99,7 @@ div:has(> #positive_prompt) {`
`99`	`99`	`}`
`100`	`100`
`101`	`101`	`.advanced_check_row {`
`102`		`- width: 250px !important;`
	`102`	`+ width: 330px !important;`
`103`	`103`	`}`
`104`	`104`
`105`	`105`	`.min_check {`