make style

tolgacangoz · tolgacangoz · commit 52fb0b4de60c · 2024-08-04T18:47:15.000+03:00
diff --git a/examples/research_projects/anytext/auxiliary_latent_module.py b/examples/research_projects/anytext/auxiliary_latent_module.py
@@ -196,10 +196,10 @@ def forward(
         np_hint = np.sum(pre_pos, axis=0).clip(0, 1)
         # prepare info dict
         info = {}
-        info['glyphs'] = []
-        info['gly_line'] = []
-        info['positions'] = []
-        info['n_lines'] = [len(texts)]*len(prompt)
+        info["glyphs"] = []
+        info["gly_line"] = []
+        info["positions"] = []
+        info["n_lines"] = [len(texts)] * len(prompt)
         for i in range(len(texts)):
             text = texts[i]
             if len(text) > max_chars:
@@ -209,40 +209,47 @@ def forward(
             gly_scale = 2
             if pre_pos[i].mean() != 0:
                 gly_line = self.draw_glyph(self.font, text)
-                glyphs = self.draw_glyph2(self.font, text, poly_list[i], scale=gly_scale, width=w, height=h, add_space=False)
+                glyphs = self.draw_glyph2(
+                    self.font, text, poly_list[i], scale=gly_scale, width=w, height=h, add_space=False
+                )
                 if revise_pos:
                     resize_gly = cv2.resize(glyphs, (pre_pos[i].shape[1], pre_pos[i].shape[0]))
-                    new_pos = cv2.morphologyEx((resize_gly*255).astype(np.uint8), cv2.MORPH_CLOSE, kernel=np.ones((resize_gly.shape[0]//10, resize_gly.shape[1]//10), dtype=np.uint8), iterations=1)
+                    new_pos = cv2.morphologyEx(
+                        (resize_gly * 255).astype(np.uint8),
+                        cv2.MORPH_CLOSE,
+                        kernel=np.ones((resize_gly.shape[0] // 10, resize_gly.shape[1] // 10), dtype=np.uint8),
+                        iterations=1,
+                    )
                     new_pos = new_pos[..., np.newaxis] if len(new_pos.shape) == 2 else new_pos
                     contours, _ = cv2.findContours(new_pos, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
                     if len(contours) != 1:
-                        str_warning = f'Fail to revise position {i} to bounding rect, remain position unchanged...'
+                        str_warning = f"Fail to revise position {i} to bounding rect, remain position unchanged..."
                         logger.warning(str_warning)
                     else:
                         rect = cv2.minAreaRect(contours[0])
                         poly = np.int0(cv2.boxPoints(rect))
-                        pre_pos[i] = cv2.drawContours(new_pos, [poly], -1, 255, -1) / 255.
+                        pre_pos[i] = cv2.drawContours(new_pos, [poly], -1, 255, -1) / 255.0
             else:
-                glyphs = np.zeros((h*gly_scale, w*gly_scale, 1))
+                glyphs = np.zeros((h * gly_scale, w * gly_scale, 1))
                 gly_line = np.zeros((80, 512, 1))
             pos = pre_pos[i]
-            info['glyphs'] += [self.arr2tensor(glyphs, len(prompt))]
-            info['gly_line'] += [self.arr2tensor(gly_line, len(prompt))]
-            info['positions'] += [self.arr2tensor(pos, len(prompt))]
+            info["glyphs"] += [self.arr2tensor(glyphs, len(prompt))]
+            info["gly_line"] += [self.arr2tensor(gly_line, len(prompt))]
+            info["positions"] += [self.arr2tensor(pos, len(prompt))]
         # get masked_x
-        masked_img = ((edit_image.astype(np.float32) / 127.5) - 1.0)*(1-np_hint)
+        masked_img = ((edit_image.astype(np.float32) / 127.5) - 1.0) * (1 - np_hint)
         masked_img = np.transpose(masked_img, (2, 0, 1))
         masked_img = torch.from_numpy(masked_img.copy()).float().to(self.device)
         if self.use_fp16:
             masked_img = masked_img.half()
         masked_x = self.encode_first_stage(masked_img[None, ...]).detach()
         if self.use_fp16:
             masked_x = masked_x.half()
-        info['masked_x'] = torch.cat([masked_x for _ in range(len(prompt))], dim=0)
+        info["masked_x"] = torch.cat([masked_x for _ in range(len(prompt))], dim=0)
         hint = self.arr2tensor(np_hint, len(prompt))
 
-        glyphs = torch.cat(info['glyphs'], dim=1).sum(dim=1, keepdim=True)
-        positions = torch.cat(info['positions'], dim=1).sum(dim=1, keepdim=True)
+        glyphs = torch.cat(info["glyphs"], dim=1).sum(dim=1, keepdim=True)
+        positions = torch.cat(info["positions"], dim=1).sum(dim=1, keepdim=True)
         enc_glyph = self.glyph_block(glyphs, emb, context)
         enc_pos = self.position_block(positions, emb, context)
         guided_hint = self.fuse_block(torch.cat([enc_glyph, enc_pos, masked_x], dim=1))
diff --git a/examples/research_projects/anytext/text_embedding_module.py b/examples/research_projects/anytext/text_embedding_module.py
@@ -2,19 +2,13 @@
 # +> Token Replacement -> FrozenCLIPEmbedderT3
 # text -> tokenizer ->
 
-from typing import List, Optional
 
-import cv2
-import numpy as np
 import torch
-from easydict import EasyDict as edict
-from PIL import Image, ImageDraw, ImageFont
+from PIL import ImageFont
 from torch import nn
 
-from diffusers.loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
-from diffusers.models.lora import adjust_lora_scale_text_encoder
-from diffusers.utils import USE_PEFT_BACKEND, logging, scale_lora_layers, unscale_lora_layers
 from diffusers.models.autoencoders.vae import DiagonalGaussianDistribution
+from diffusers.utils import logging
 
 from .embedding_manager import EmbeddingManager
 from .frozen_clip_embedder_t3 import FrozenCLIPEmbedderT3
@@ -46,36 +40,40 @@ def __init__(self, font_path, device, use_fp16):
         rec_model_dir = "./ocr_weights/ppv3_rec.pth"
         self.text_predictor = create_predictor(rec_model_dir).eval()
         args = {}
-        args['rec_image_shape'] = "3, 48, 320"
-        args['rec_batch_num'] = 6
-        args['rec_char_dict_path'] = "./ocr_recog/ppocr_keys_v1.txt"
-        args['use_fp16'] = use_fp16
+        args["rec_image_shape"] = "3, 48, 320"
+        args["rec_batch_num"] = 6
+        args["rec_char_dict_path"] = "./ocr_recog/ppocr_keys_v1.txt"
+        args["use_fp16"] = use_fp16
         self.cn_recognizer = TextRecognizer(args, self.text_predictor)
         for param in self.text_predictor.parameters():
             param.requires_grad = False
         self.embedding_manager.recog = self.cn_recognizer
 
     @torch.no_grad()
     def forward(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance, hint, n_prompt, text_info):
-        prompt_embeds = self.get_learned_conditioning({"c_concat": [hint], "c_crossattn": [[prompt] * len(prompt)], "text_info": text_info})
-        negative_prompt_embeds = self.get_learned_conditioning({"c_concat": [hint], "c_crossattn": [[n_prompt] * len(prompt)], "text_info": text_info})
+        prompt_embeds = self.get_learned_conditioning(
+            {"c_concat": [hint], "c_crossattn": [[prompt] * len(prompt)], "text_info": text_info}
+        )
+        negative_prompt_embeds = self.get_learned_conditioning(
+            {"c_concat": [hint], "c_crossattn": [[n_prompt] * len(prompt)], "text_info": text_info}
+        )
 
         return prompt_embeds, negative_prompt_embeds
 
     def get_learned_conditioning(self, c):
-        if hasattr(self.frozen_CLIP_embedder_t3, 'encode') and callable(self.frozen_CLIP_embedder_t3.encode):
-            if self.embedding_manager is not None and c['text_info'] is not None:
-                self.embedding_manager.encode_text(c['text_info'])
+        if hasattr(self.frozen_CLIP_embedder_t3, "encode") and callable(self.frozen_CLIP_embedder_t3.encode):
+            if self.embedding_manager is not None and c["text_info"] is not None:
+                self.embedding_manager.encode_text(c["text_info"])
             if isinstance(c, dict):
-                cond_txt = c['c_crossattn'][0]
+                cond_txt = c["c_crossattn"][0]
             else:
                 cond_txt = c
             if self.embedding_manager is not None:
                 cond_txt = self.frozen_CLIP_embedder_t3.encode(cond_txt, embedding_manager=self.embedding_manager)
             else:
                 cond_txt = self.frozen_CLIP_embedder_t3.encode(cond_txt)
             if isinstance(c, dict):
-                c['c_crossattn'][0] = cond_txt
+                c["c_crossattn"][0] = cond_txt
             else:
                 c = cond_txt
             if isinstance(c, DiagonalGaussianDistribution):