Merge pull request #62 from modelscope/v1.2.0_dev

maochaojie · web-flow · commit 4ec049289772 · 2024-11-07T20:30:51.000+08:00
update chatbot example
diff --git a/readme.md b/readme.md
@@ -18,7 +18,7 @@ SCEPTER offers 3 core components:
 
 
 ## 🎉 News
-- [🔥🔥🔥2024.10]: We are pleased to announce the release of the code for [ACE](https://arxiv.org/abs/2410.00086), supporting Customized Training / Comfy UI Workflow / gradio-based ChatBot Interface. The corresponding checkpoints are uploaded on [ModelScope](https://www.modelscope.cn/models/iic/ACE-0.6B-512px) and [HuggingFace](https://huggingface.co/scepter-studio/ACE-0.6B-512px). The detailed documents can be found at [ACE repo (soon)]().
+- [🔥🔥🔥2024.10]: We are pleased to announce the release of the code for [ACE](https://arxiv.org/abs/2410.00086), supporting Customized Training / Comfy UI Workflow / gradio-based ChatBot Interface. The detailed documents can be found at [ACE repo](https://github.com/ali-vilab/ACE.git).
 - [2024.10]: Support for inference and tuning with [FLUX](https://huggingface.co/black-forest-labs/FLUX.1-dev), as well as for building [ComfyUI](https://github.com/comfyanonymous/ComfyUI) workflows using this framework.
 - [2024.09]: We introduce **ACE**, an **A**ll-round **C**reator and **E**ditor adept at executing a diverse array of image editing tasks tailored to your specifications. Built upon the cutting-edge Diffusion Transformer architecture, ACE has been extensively trained on a comprehensive dataset to seamlessly interpret and execute any natural language instruction. For further information, please consult the [project page](https://ali-vilab.github.io/ace-page/).
 - [2024.07]: Support the inference and training of open-source generative models based on the [DiT](https://arxiv.org/abs/2212.09748) architecture, such as [SD3](https://arxiv.org/pdf/2403.03206) and [PixArt](https://arxiv.org/abs/2310.00426).
diff --git a/scepter/studio/chatbot/chatbot.py b/scepter/studio/chatbot/chatbot.py
@@ -391,9 +391,11 @@ def create_ui(self):
                        2. Enter '@' in the text box will exhibit all images in the gallery.
                        3. Select the image you wish to edit from the gallery, and its Image ID will be displayed in the text box.
                        4. Compose the editing instruction for the selected image, incorporating image id '@xxxxxx' into your instruction.
-                       For example, you might say, "Change the girl's skirt in @123456 to blue." The '@xxxxx' token will facilitate the identification of the specific image, and will be automatically replaced by a special token '{image}' in the instruction.
+                       For example, you might say, "Change the girl's skirt in @123456 to blue." The '@xxxxx' token will facilitate the identification of the specific image, and will be automatically replaced by a special token '{image}' in the instruction. Furthermore, it is also possible to engage in text-to-image generation without any initial image input.
                        5. Once your instructions are prepared, please click the "Chat" button to view the edited result in the chat window.
                        6. **Important** To render text on an image, please ensure to include a space between each letter. For instance, "add text 'g i r l' on the mask area of @xxxxx".
+                       7. To implement local editing based on a specified mask, simply click on the image within the chat window to access the image editor. Here, you can draw a mask and then click the 'Submit' button to upload the edited image along with the mask. For inpainting tasks, select the 'Composite' mask type, while for outpainting tasks, choose the 'Outpainting' mask type. For all other local editing tasks, please select the 'Background' mask type.
+                       8. If you find our work valuable, we invite you to refer to the [ACE Page](https://ali-vilab.github.io/ace-page/) for comprehensive information.
 
                     """
                     gr.Markdown(value=inst)
diff --git a/scepter/studio/chatbot/example.py b/scepter/studio/chatbot/example.py
@@ -14,6 +14,77 @@ def download_image(image, local_path=None):
 def get_examples(cache_dir):
     print('Downloading Examples ...')
     examples = [
+        [
+            'Facial Editing',
+            download_image(
+                'https://github.com/ali-vilab/ace-page/blob/main/assets/examples/e33edc106953.png?raw=true',
+                os.path.join(cache_dir, 'examples/e33edc106953.png')), None,
+            None, '{image} let the man smile', 6666
+        ],
+        [
+            'Facial Editing',
+            download_image(
+                'https://github.com/ali-vilab/ace-page/blob/main/assets/examples/5d2bcc91a3e9.png?raw=true',
+                os.path.join(cache_dir, 'examples/5d2bcc91a3e9.png')), None,
+            None, 'let the man in {image} wear sunglasses', 9999
+        ],
+        [
+            'Facial Editing',
+            download_image(
+                'https://github.com/ali-vilab/ace-page/blob/main/assets/examples/3a52eac708bd.png?raw=true',
+                os.path.join(cache_dir, 'examples/3a52eac708bd.png')), None,
+            None, '{image} red hair', 9999
+        ],
+        [
+            'Facial Editing',
+            download_image(
+                'https://github.com/ali-vilab/ace-page/blob/main/assets/examples/3f4dc464a0ea.png?raw=true',
+                os.path.join(cache_dir, 'examples/3f4dc464a0ea.png')), None,
+            None, '{image} let the man serious', 99999
+        ],
+        [
+            'Controllable Generation',
+            download_image(
+                'https://github.com/ali-vilab/ace-page/blob/main/assets/examples/131ca90fd2a9.png?raw=true',
+                os.path.join(cache_dir,
+                             'examples/131ca90fd2a9.png')), None, None,
+            '"A person sits contemplatively on the ground, surrounded by falling autumn leaves. Dressed in a green sweater and dark blue pants, they rest their chin on their hand, exuding a relaxed demeanor. Their stylish checkered slip-on shoes add a touch of flair, while a black purse lies in their lap. The backdrop of muted brown enhances the warm, cozy atmosphere of the scene." , generate the image that corresponds to the given scribble {image}.',
+            613725
+        ],
+        [
+            'Render Text',
+            download_image(
+                'https://github.com/ali-vilab/ace-page/blob/main/assets/examples/33e9f27c2c48.png?raw=true',
+                os.path.join(cache_dir, 'examples/33e9f27c2c48.png')),
+            download_image(
+                'https://github.com/ali-vilab/ace-page/blob/main/assets/examples/33e9f27c2c48_mask.png?raw=true',
+                os.path.join(cache_dir,
+                             'examples/33e9f27c2c48_mask.png')), None,
+            'Put the text "C A T" at the position marked by mask in the {image}',
+            6666
+        ],
+        [
+            'Style Transfer',
+            download_image(
+                'https://github.com/ali-vilab/ace-page/blob/main/assets/examples/9e73e7eeef55.png?raw=true',
+                os.path.join(cache_dir, 'examples/9e73e7eeef55.png')), None,
+            download_image(
+                'https://github.com/ali-vilab/ace-page/blob/main/assets/examples/2e02975293d6.png?raw=true',
+                os.path.join(cache_dir, 'examples/2e02975293d6.png')),
+            'edit {image} based on the style of {image1} ', 99999
+        ],
+        [
+            'Outpainting',
+            download_image(
+                'https://github.com/ali-vilab/ace-page/blob/main/assets/examples/f2b22c08be3f.png?raw=true',
+                os.path.join(cache_dir, 'examples/f2b22c08be3f.png')),
+            download_image(
+                'https://github.com/ali-vilab/ace-page/blob/main/assets/examples/f2b22c08be3f_mask.png?raw=true',
+                os.path.join(cache_dir,
+                             'examples/f2b22c08be3f_mask.png')), None,
+            'Could the {image} be widened within the space designated by mask, while retaining the original?',
+            6666
+        ],
         [
             'Image Segmentation',
             download_image(
@@ -135,15 +206,6 @@ def get_examples(cache_dir):
             'creating a vivid image based on {image} and description "This image features a delicious rectangular tart with a flaky, golden-brown crust. The tart is topped with evenly sliced tomatoes, layered over a creamy cheese filling. Aromatic herbs are sprinkled on top, adding a touch of green and enhancing the visual appeal. The background includes a soft, textured fabric and scattered white flowers, creating an elegant and inviting presentation. Bright red tomatoes in the upper right corner hint at the fresh ingredients used in the dish."',
             6666
         ],
-        [
-            'Controllable Generation',
-            download_image(
-                'https://github.com/ali-vilab/ace-page/blob/main/assets/examples/131ca90fd2a9.png?raw=true',
-                os.path.join(cache_dir,
-                             'examples/131ca90fd2a9.png')), None, None,
-            '"A person sits contemplatively on the ground, surrounded by falling autumn leaves. Dressed in a green sweater and dark blue pants, they rest their chin on their hand, exuding a relaxed demeanor. Their stylish checkered slip-on shoes add a touch of flair, while a black purse lies in their lap. The backdrop of muted brown enhances the warm, cozy atmosphere of the scene." , generate the image that corresponds to the given scribble {image}.',
-            613725
-        ],
         [
             'Image Denoising',
             download_image(
@@ -177,18 +239,6 @@ def get_examples(cache_dir):
             'Refashion the mask portion of {image} in accordance with "A yellow egg with a smiling face painted on it"',
             6666
         ],
-        [
-            'Outpainting',
-            download_image(
-                'https://github.com/ali-vilab/ace-page/blob/main/assets/examples/f2b22c08be3f.png?raw=true',
-                os.path.join(cache_dir, 'examples/f2b22c08be3f.png')),
-            download_image(
-                'https://github.com/ali-vilab/ace-page/blob/main/assets/examples/f2b22c08be3f_mask.png?raw=true',
-                os.path.join(cache_dir,
-                             'examples/f2b22c08be3f_mask.png')), None,
-            'Could the {image} be widened within the space designated by mask, while retaining the original?',
-            6666
-        ],
         [
             'General Editing',
             download_image(
@@ -230,18 +280,6 @@ def get_examples(cache_dir):
                 os.path.join(cache_dir, 'examples/92751f2e4a0e.png')), None,
             None, '{image} Remove the smile from his face', 9899999
         ],
-        [
-            'Render Text',
-            download_image(
-                'https://github.com/ali-vilab/ace-page/blob/main/assets/examples/33e9f27c2c48.png?raw=true',
-                os.path.join(cache_dir, 'examples/33e9f27c2c48.png')),
-            download_image(
-                'https://github.com/ali-vilab/ace-page/blob/main/assets/examples/33e9f27c2c48_mask.png?raw=true',
-                os.path.join(cache_dir,
-                             'examples/33e9f27c2c48_mask.png')), None,
-            'Put the text "C A T" at the position marked by mask in the {image}',
-            6666
-        ],
         [
             'Remove Text',
             download_image(
@@ -304,16 +342,6 @@ def get_examples(cache_dir):
                 os.path.join(cache_dir, 'examples/e0f48b3fd010.png')), None,
             None, 'make {image} to Walt Disney Animation style', 99999
         ],
-        [
-            'Style Transfer',
-            download_image(
-                'https://github.com/ali-vilab/ace-page/blob/main/assets/examples/9e73e7eeef55.png?raw=true',
-                os.path.join(cache_dir, 'examples/9e73e7eeef55.png')), None,
-            download_image(
-                'https://github.com/ali-vilab/ace-page/blob/main/assets/examples/2e02975293d6.png?raw=true',
-                os.path.join(cache_dir, 'examples/2e02975293d6.png')),
-            'edit {image} based on the style of {image1} ', 99999
-        ],
         [
             'Try On',
             download_image(