Skip to content

Commit fb900cc

Browse files
authored
Create demo.ipynb
1 parent d196d83 commit fb900cc

File tree

1 file changed

+201
-0
lines changed

1 file changed

+201
-0
lines changed
Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 2,
6+
"metadata": {},
7+
"outputs": [
8+
{
9+
"name": "stdout",
10+
"output_type": "stream",
11+
"text": [
12+
"The autoreload extension is already loaded. To reload it, use:\n",
13+
" %reload_ext autoreload\n"
14+
]
15+
},
16+
{
17+
"name": "stderr",
18+
"output_type": "stream",
19+
"text": [
20+
"/root/miniconda/envs/densecaption/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
21+
" from .autonotebook import tqdm as notebook_tqdm\n"
22+
]
23+
}
24+
],
25+
"source": [
26+
"%load_ext autoreload\n",
27+
"%autoreload 2\n",
28+
"\n",
29+
"import torch\n",
30+
"from diffusers import StableDiffusionGLIGENTextImagePipeline, StableDiffusionGLIGENPipeline"
31+
]
32+
},
33+
{
34+
"cell_type": "code",
35+
"execution_count": 7,
36+
"metadata": {},
37+
"outputs": [],
38+
"source": [
39+
"import os\n",
40+
"import diffusers\n",
41+
"from diffusers import (\n",
42+
" AutoencoderKL,\n",
43+
" DDPMScheduler,\n",
44+
" UNet2DConditionModel,\n",
45+
" UniPCMultistepScheduler,\n",
46+
" EulerDiscreteScheduler,\n",
47+
")\n",
48+
"from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer\n",
49+
"# pretrained_model_name_or_path = 'masterful/gligen-1-4-generation-text-box'\n",
50+
"\n",
51+
"pretrained_model_name_or_path = '/root/data/zhizhonghuang/checkpoints/models--masterful--gligen-1-4-generation-text-box/snapshots/d2820dc1e9ba6ca082051ce79cfd3eb468ae2c83'\n",
52+
"\n",
53+
"tokenizer = CLIPTokenizer.from_pretrained(pretrained_model_name_or_path, subfolder=\"tokenizer\")\n",
54+
"noise_scheduler = DDPMScheduler.from_pretrained(pretrained_model_name_or_path, subfolder=\"scheduler\")\n",
55+
"text_encoder = CLIPTextModel.from_pretrained(\n",
56+
" pretrained_model_name_or_path, subfolder=\"text_encoder\"\n",
57+
")\n",
58+
"vae = AutoencoderKL.from_pretrained(\n",
59+
" pretrained_model_name_or_path, subfolder=\"vae\"\n",
60+
")\n",
61+
"# unet = UNet2DConditionModel.from_pretrained(\n",
62+
"# pretrained_model_name_or_path, subfolder=\"unet\"\n",
63+
"# )\n",
64+
"\n",
65+
"noise_scheduler = EulerDiscreteScheduler.from_config(noise_scheduler.config)"
66+
]
67+
},
68+
{
69+
"cell_type": "code",
70+
"execution_count": 8,
71+
"metadata": {},
72+
"outputs": [],
73+
"source": [
74+
"unet = UNet2DConditionModel.from_pretrained(\n",
75+
" '/root/data/zhizhonghuang/ckpt/GLIGEN_Text_Retrain_COCO'\n",
76+
")"
77+
]
78+
},
79+
{
80+
"cell_type": "code",
81+
"execution_count": 9,
82+
"metadata": {},
83+
"outputs": [
84+
{
85+
"name": "stderr",
86+
"output_type": "stream",
87+
"text": [
88+
"You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion_gligen.pipeline_stable_diffusion_gligen.StableDiffusionGLIGENPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .\n"
89+
]
90+
}
91+
],
92+
"source": [
93+
"pipe = StableDiffusionGLIGENPipeline(\n",
94+
" vae,\n",
95+
" text_encoder,\n",
96+
" tokenizer,\n",
97+
" unet,\n",
98+
" noise_scheduler,\n",
99+
" safety_checker=None,\n",
100+
" feature_extractor=None,\n",
101+
")\n",
102+
"pipe = pipe.to(\"cuda\")"
103+
]
104+
},
105+
{
106+
"cell_type": "code",
107+
"execution_count": 10,
108+
"metadata": {},
109+
"outputs": [],
110+
"source": [
111+
"# prompt = 'A realistic image of landscape scene depicting a green car parking on the left of a blue truck, with a red air balloon and a bird in the sky'\n",
112+
"# gen_boxes = [('a green car', [21, 281, 211, 159]), ('a blue truck', [269, 283, 209, 160]), ('a red air balloon', [66, 8, 145, 135]), ('a bird', [296, 42, 143, 100])]\n",
113+
"\n",
114+
"# prompt = 'A realistic top-down view of a wooden table with two apples on it'\n",
115+
"# gen_boxes = [('a wooden table', [20, 148, 472, 216]), ('an apple', [150, 226, 100, 100]), ('an apple', [280, 226, 100, 100])]\n",
116+
"\n",
117+
"# prompt = 'A realistic scene of three skiers standing in a line on the snow near a palm tree'\n",
118+
"# gen_boxes = [('a skier', [5, 152, 139, 168]), ('a skier', [278, 192, 121, 158]), ('a skier', [148, 173, 124, 155]), ('a palm tree', [404, 105, 103, 251])]\n",
119+
"\n",
120+
"prompt = 'An oil painting of a pink dolphin jumping on the left of a steam boat on the sea'\n",
121+
"gen_boxes = [('a steam boat', [232, 225, 257, 149]), ('a jumping pink dolphin', [21, 249, 189, 123])]\n",
122+
"\n",
123+
"import numpy as np\n",
124+
"\n",
125+
"boxes = np.array([x[1] for x in gen_boxes])\n",
126+
"boxes = boxes / 512\n",
127+
"boxes[:, 2] = boxes[:, 0] + boxes[:, 2]\n",
128+
"boxes[:, 3] = boxes[:, 1] + boxes[:, 3]\n",
129+
"boxes = boxes.tolist()\n",
130+
"gligen_phrases = [x[0] for x in gen_boxes]"
131+
]
132+
},
133+
{
134+
"cell_type": "code",
135+
"execution_count": 11,
136+
"metadata": {},
137+
"outputs": [
138+
{
139+
"name": "stderr",
140+
"output_type": "stream",
141+
"text": [
142+
"/root/miniconda/envs/densecaption/lib/python3.11/site-packages/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py:683: FutureWarning: Accessing config attribute `in_channels` directly via 'UNet2DConditionModel' object attribute is deprecated. Please access 'in_channels' over 'UNet2DConditionModel's config object instead, e.g. 'unet.config.in_channels'.\n",
143+
" num_channels_latents = self.unet.in_channels\n",
144+
"/root/miniconda/envs/densecaption/lib/python3.11/site-packages/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py:716: FutureWarning: Accessing config attribute `cross_attention_dim` directly via 'UNet2DConditionModel' object attribute is deprecated. Please access 'cross_attention_dim' over 'UNet2DConditionModel's config object instead, e.g. 'unet.config.cross_attention_dim'.\n",
145+
" max_objs, self.unet.cross_attention_dim, device=device, dtype=self.text_encoder.dtype\n",
146+
"100%|██████████| 50/50 [01:21<00:00, 1.64s/it]\n"
147+
]
148+
}
149+
],
150+
"source": [
151+
"images = pipe(\n",
152+
" prompt=prompt,\n",
153+
" gligen_phrases=gligen_phrases,\n",
154+
" gligen_boxes=boxes,\n",
155+
" gligen_scheduled_sampling_beta=1.0,\n",
156+
" output_type=\"pil\",\n",
157+
" num_inference_steps=50,\n",
158+
" negative_prompt=\"artifacts, blurry, smooth texture, bad quality, distortions, unrealistic, distorted image, bad proportions, duplicate\",\n",
159+
" num_images_per_prompt=16,\n",
160+
").images"
161+
]
162+
},
163+
{
164+
"cell_type": "code",
165+
"execution_count": 12,
166+
"metadata": {},
167+
"outputs": [],
168+
"source": [
169+
"diffusers.utils.make_image_grid(images, 4, len(images)//4)"
170+
]
171+
},
172+
{
173+
"cell_type": "code",
174+
"execution_count": null,
175+
"metadata": {},
176+
"outputs": [],
177+
"source": []
178+
}
179+
],
180+
"metadata": {
181+
"kernelspec": {
182+
"display_name": "densecaption",
183+
"language": "python",
184+
"name": "python3"
185+
},
186+
"language_info": {
187+
"codemirror_mode": {
188+
"name": "ipython",
189+
"version": 3
190+
},
191+
"file_extension": ".py",
192+
"mimetype": "text/x-python",
193+
"name": "python",
194+
"nbconvert_exporter": "python",
195+
"pygments_lexer": "ipython3",
196+
"version": "3.11.9"
197+
}
198+
},
199+
"nbformat": 4,
200+
"nbformat_minor": 2
201+
}

0 commit comments

Comments
 (0)