Skip to content

Commit f2a2190

Browse files
committed
Sync branch 'mashb1t_main' with develop_upstream
2 parents 34f67c0 + 5a71495 commit f2a2190

40 files changed

+2819
-908
lines changed
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
name: Bug Report
2+
description: Describe a problem
3+
title: "[Bug]: "
4+
labels: ["bug", "triage"]
5+
body:
6+
- type: markdown
7+
attributes:
8+
value: |
9+
Thank you for taking the time to fill out this bug report form!
10+
- type: checkboxes
11+
id: prerequisites
12+
attributes:
13+
label: Prerequisites
14+
description: Please make sure to troubleshoot yourself before continuing.
15+
options:
16+
- label: I have read the [Troubleshooting Guide](https://github.com/lllyasviel/Fooocus/blob/main/troubleshoot.md)
17+
required: true
18+
- label: I have checked that this is not a duplicate of an already existing [issue](https://github.com/lllyasviel/Fooocus/issues)
19+
required: true
20+
- type: textarea
21+
id: description
22+
attributes:
23+
label: Describe the problem
24+
description: Also tell us, what did you expect to happen?
25+
placeholder: "A clear and concise description of what the bug is."
26+
validations:
27+
required: true
28+
- type: textarea
29+
id: logs
30+
attributes:
31+
label: Full console log output
32+
description: Please copy and paste the **full** console log here. You will make our job easier if you give a **full** log. This will be automatically formatted into code, so no need for backticks.
33+
render: shell
34+
validations:
35+
required: true
36+
- type: textarea
37+
id: version
38+
attributes:
39+
label: Version
40+
description: What version of Fooocus are you using? (see browser tab title or console log)
41+
placeholder: "Example: Fooocus 2.1.855"
42+
validations:
43+
required: true
44+
- type: dropdown
45+
id: hosting
46+
attributes:
47+
label: Where are you running Fooocus?
48+
multiple: false
49+
options:
50+
- Locally
51+
- Locally with virtualisation (e.g. Docker)
52+
- Cloud (Gradio)
53+
- Cloud (other)
54+
validations:
55+
required: true
56+
- type: input
57+
id: operating-system
58+
attributes:
59+
label: Operating System
60+
description: What operating system are you using?
61+
placeholder: "Example: Windows 10"
62+
- type: dropdown
63+
id: browsers
64+
attributes:
65+
label: What browsers are you seeing the problem on?
66+
multiple: true
67+
options:
68+
- Chrome
69+
- Firefox
70+
- Microsoft Edge
71+
- Safari
72+
- other
73+
validations:
74+
required: true
75+
- type: markdown
76+
attributes:
77+
value: "Thank you for completing our form!"
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
name: Feature request
2+
description: Suggest an idea for this project
3+
title: "[Feature]: "
4+
labels: ["enhancement"]
5+
body:
6+
- type: markdown
7+
attributes:
8+
value: |
9+
Thank you for taking the time to fill out this feature request form!
10+
- type: checkboxes
11+
id: prerequisites
12+
attributes:
13+
label: Prerequisites
14+
options:
15+
- label: I have checked that this is not a duplicate of an already existing [feature request](https://github.com/lllyasviel/Fooocus/issues)
16+
required: true
17+
- type: textarea
18+
id: relation-to-problem
19+
attributes:
20+
label: Is your feature request related to a problem? Please describe.
21+
placeholder: "A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
22+
."
23+
validations:
24+
required: true
25+
- type: textarea
26+
id: description
27+
attributes:
28+
label: Describe the idea you'd like
29+
placeholder: "A clear and concise description of what you want to happen."
30+
validations:
31+
required: true
32+
- type: markdown
33+
attributes:
34+
value: "Thank you for completing our form!"

.github/workflows/build_container.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ jobs:
3838
type=edge,branch=main
3939
4040
- name: Build and push Docker image
41-
uses: docker/build-push-action@v5
41+
uses: docker/build-push-action@v6
4242
with:
4343
context: .
4444
file: ./Dockerfile

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ __pycache__
1010
*.partial
1111
*.onnx
1212
sorted_styles.json
13+
hash_cache.txt
1314
/input
1415
/cache
1516
/language/default.json

args_manager.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,14 @@
2828
args_parser.parser.add_argument("--disable-preset-download", action='store_true',
2929
help="Disables downloading models for presets", default=False)
3030

31-
args_parser.parser.add_argument("--enable-describe-uov-image", action='store_true',
32-
help="Disables automatic description of uov images when prompt is empty", default=False)
31+
args_parser.parser.add_argument("--enable-auto-describe-image", action='store_true',
32+
help="Enables automatic description of uov and enhance image when prompt is empty", default=False)
3333

3434
args_parser.parser.add_argument("--always-download-new-model", action='store_true',
35-
help="Always download newer models ", default=False)
35+
help="Always download newer models", default=False)
36+
37+
args_parser.parser.add_argument("--rebuild-hash-cache", help="Generates missing model and LoRA hashes.",
38+
type=int, nargs="?", metavar="CPU_NUM_THREADS", const=-1)
3639

3740
args_parser.parser.set_defaults(
3841
disable_cuda_malloc=True,

css/style.css

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ div:has(> #positive_prompt) {
9999
}
100100

101101
.advanced_check_row {
102-
width: 250px !important;
102+
width: 330px !important;
103103
}
104104

105105
.min_check {

experiments_mask_generation.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# https://github.com/sail-sg/EditAnything/blob/main/sam2groundingdino_edit.py
2+
3+
import numpy as np
4+
from PIL import Image
5+
6+
from extras.inpaint_mask import SAMOptions, generate_mask_from_image
7+
8+
original_image = Image.open('cat.webp')
9+
image = np.array(original_image, dtype=np.uint8)
10+
11+
sam_options = SAMOptions(
12+
dino_prompt='eye',
13+
dino_box_threshold=0.3,
14+
dino_text_threshold=0.25,
15+
dino_erode_or_dilate=0,
16+
dino_debug=False,
17+
max_detections=2,
18+
model_type='vit_b'
19+
)
20+
21+
mask_image, _, _, _ = generate_mask_from_image(image, sam_options=sam_options)
22+
23+
merged_masks_img = Image.fromarray(mask_image)
24+
merged_masks_img.show()
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
batch_size = 1
2+
modelname = "groundingdino"
3+
backbone = "swin_T_224_1k"
4+
position_embedding = "sine"
5+
pe_temperatureH = 20
6+
pe_temperatureW = 20
7+
return_interm_indices = [1, 2, 3]
8+
backbone_freeze_keywords = None
9+
enc_layers = 6
10+
dec_layers = 6
11+
pre_norm = False
12+
dim_feedforward = 2048
13+
hidden_dim = 256
14+
dropout = 0.0
15+
nheads = 8
16+
num_queries = 900
17+
query_dim = 4
18+
num_patterns = 0
19+
num_feature_levels = 4
20+
enc_n_points = 4
21+
dec_n_points = 4
22+
two_stage_type = "standard"
23+
two_stage_bbox_embed_share = False
24+
two_stage_class_embed_share = False
25+
transformer_activation = "relu"
26+
dec_pred_bbox_embed_share = True
27+
dn_box_noise_scale = 1.0
28+
dn_label_noise_ratio = 0.5
29+
dn_label_coef = 1.0
30+
dn_bbox_coef = 1.0
31+
embed_init_tgt = True
32+
dn_labelbook_size = 2000
33+
max_text_len = 256
34+
text_encoder_type = "bert-base-uncased"
35+
use_text_enhancer = True
36+
use_fusion_layer = True
37+
use_checkpoint = True
38+
use_transformer_ckpt = True
39+
use_text_cross_attention = True
40+
text_dropout = 0.0
41+
fusion_dropout = 0.0
42+
fusion_droppath = 0.1
43+
sub_sentence_present = True
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
from typing import Tuple, List
2+
3+
import ldm_patched.modules.model_management as model_management
4+
from ldm_patched.modules.model_patcher import ModelPatcher
5+
from modules.config import path_inpaint
6+
from modules.model_loader import load_file_from_url
7+
8+
import numpy as np
9+
import supervision as sv
10+
import torch
11+
from groundingdino.util.inference import Model
12+
from groundingdino.util.inference import load_model, preprocess_caption, get_phrases_from_posmap
13+
14+
15+
class GroundingDinoModel(Model):
16+
def __init__(self):
17+
self.config_file = 'extras/GroundingDINO/config/GroundingDINO_SwinT_OGC.py'
18+
self.model = None
19+
self.load_device = torch.device('cpu')
20+
self.offload_device = torch.device('cpu')
21+
22+
@torch.no_grad()
23+
@torch.inference_mode()
24+
def predict_with_caption(
25+
self,
26+
image: np.ndarray,
27+
caption: str,
28+
box_threshold: float = 0.35,
29+
text_threshold: float = 0.25
30+
) -> Tuple[sv.Detections, torch.Tensor, torch.Tensor, List[str]]:
31+
if self.model is None:
32+
filename = load_file_from_url(
33+
url="https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth",
34+
file_name='groundingdino_swint_ogc.pth',
35+
model_dir=path_inpaint)
36+
model = load_model(model_config_path=self.config_file, model_checkpoint_path=filename)
37+
38+
self.load_device = model_management.text_encoder_device()
39+
self.offload_device = model_management.text_encoder_offload_device()
40+
41+
model.to(self.offload_device)
42+
43+
self.model = ModelPatcher(model, load_device=self.load_device, offload_device=self.offload_device)
44+
45+
model_management.load_model_gpu(self.model)
46+
47+
processed_image = GroundingDinoModel.preprocess_image(image_bgr=image).to(self.load_device)
48+
boxes, logits, phrases = predict(
49+
model=self.model,
50+
image=processed_image,
51+
caption=caption,
52+
box_threshold=box_threshold,
53+
text_threshold=text_threshold,
54+
device=self.load_device)
55+
source_h, source_w, _ = image.shape
56+
detections = GroundingDinoModel.post_process_result(
57+
source_h=source_h,
58+
source_w=source_w,
59+
boxes=boxes,
60+
logits=logits)
61+
return detections, boxes, logits, phrases
62+
63+
64+
def predict(
65+
model,
66+
image: torch.Tensor,
67+
caption: str,
68+
box_threshold: float,
69+
text_threshold: float,
70+
device: str = "cuda"
71+
) -> Tuple[torch.Tensor, torch.Tensor, List[str]]:
72+
caption = preprocess_caption(caption=caption)
73+
74+
# override to use model wrapped by patcher
75+
model = model.model.to(device)
76+
image = image.to(device)
77+
78+
with torch.no_grad():
79+
outputs = model(image[None], captions=[caption])
80+
81+
prediction_logits = outputs["pred_logits"].cpu().sigmoid()[0] # prediction_logits.shape = (nq, 256)
82+
prediction_boxes = outputs["pred_boxes"].cpu()[0] # prediction_boxes.shape = (nq, 4)
83+
84+
mask = prediction_logits.max(dim=1)[0] > box_threshold
85+
logits = prediction_logits[mask] # logits.shape = (n, 256)
86+
boxes = prediction_boxes[mask] # boxes.shape = (n, 4)
87+
88+
tokenizer = model.tokenizer
89+
tokenized = tokenizer(caption)
90+
91+
phrases = [
92+
get_phrases_from_posmap(logit > text_threshold, tokenized, tokenizer).replace('.', '')
93+
for logit
94+
in logits
95+
]
96+
97+
return boxes, logits.max(dim=1)[0], phrases
98+
99+
100+
default_groundingdino = GroundingDinoModel().predict_with_caption

extras/censor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def censor(self, images: list | np.ndarray) -> list | np.ndarray:
4141
model_management.load_model_gpu(self.safety_checker_model)
4242

4343
single = False
44-
if not isinstance(images, list) or isinstance(images, np.ndarray):
44+
if not isinstance(images, (list, np.ndarray)):
4545
images = [images]
4646
single = True
4747

0 commit comments

Comments
 (0)