|
| 1 | +import random |
| 2 | +import torch |
| 3 | +import os |
| 4 | +import os.path as osp |
| 5 | +import cv2 |
| 6 | +import numpy as np |
| 7 | +from run_infinity import * |
| 8 | + |
| 9 | +torch.cuda.set_device(0) |
| 10 | +model_path = '/workspace/Infinity/weights/infinity_2b_reg.pth' |
| 11 | +vae_path = '/workspace/Infinity/weights/infinity_vae_d32reg.pth' |
| 12 | +text_encoder_ckpt = '/workspace/Infinity/weights/flan-t5-xl' |
| 13 | + |
| 14 | +# SET |
| 15 | +args = argparse.Namespace( |
| 16 | + pn='1M', |
| 17 | + model_path=model_path, |
| 18 | + cfg_insertion_layer=0, |
| 19 | + vae_type=32, |
| 20 | + vae_path=vae_path, |
| 21 | + add_lvl_embeding_only_first_block=1, |
| 22 | + use_bit_label=1, |
| 23 | + model_type='infinity_2b', |
| 24 | + rope2d_each_sa_layer=1, |
| 25 | + rope2d_normalized_by_hw=2, |
| 26 | + use_scale_schedule_embedding=0, |
| 27 | + sampling_per_bits=1, |
| 28 | + text_encoder_ckpt=text_encoder_ckpt, |
| 29 | + text_channels=2048, |
| 30 | + apply_spatial_patchify=0, |
| 31 | + h_div_w_template=1.000, |
| 32 | + use_flex_attn=0, |
| 33 | + cache_dir='/dev/shm', |
| 34 | + checkpoint_type='torch', |
| 35 | + seed=0, |
| 36 | + bf16=1, |
| 37 | + save_file='tmp.jpg', |
| 38 | + enable_model_cache=0 |
| 39 | +) |
| 40 | + |
| 41 | +# LOAD |
| 42 | +text_tokenizer, text_encoder = load_tokenizer(t5_path=args.text_encoder_ckpt) |
| 43 | +vae = load_visual_tokenizer(args) |
| 44 | +infinity = load_transformer(vae, args) |
| 45 | + |
| 46 | +# PROMPT |
| 47 | +prompts = { |
| 48 | + "vintage_insect": "Insect made from vintage 1960s electronic components, capacitors, resistors, transistors, wires, diodes, solder, circuitboard.", |
| 49 | + "macro_closeup": "Denis Villeneuve's extreme macro cinematographic close-up in water.", |
| 50 | + "3d_school": "A creative 3D image to be placed at the bottom of a mobile application's homepage, depicting a miniature school and children carrying backpacks.", |
| 51 | + "explore_more": "Create an image with 'Explore More' in an adventurous font over a picturesque hiking trail.", |
| 52 | + "toy_car": "Close-up shot of a diecast toy car, diorama, night, lights from windows, bokeh, snow.", |
| 53 | + "fairy_house": "House: white; pink tinted windows; surrounded by flowers; cute; scenic; garden; fairy-like; epic; photography; photorealistic; insanely detailed and intricate; textures; grain; ultra-realistic.", |
| 54 | + "cat_fashion": "Hyperrealistic black and white photography of cats fashion show in style of Helmut Newton.", |
| 55 | + "spacefrog_astroduck": "Two superheroes called Spacefrog (a dashing green cartoon-like frog with a red cape) and Astroduck (a yellow fuzzy duck, part-robot, with blue/grey armor), near a garden pond, next to their spaceship, a classic flying saucer, called the Tadpole 3000. Photorealistic.", |
| 56 | + "miniature_village": "An enchanted miniature village bustling with activity, featuring tiny houses, markets, and residents.", |
| 57 | + "corgi_dog": "A close-up photograph of a Corgi dog. The dog is wearing a black hat and round, dark sunglasses. The Corgi has a joyful expression, with its mouth open and tongue sticking out, giving an impression of happiness or excitement.", |
| 58 | + "robot_eggplant": "a robot holding a huge eggplant, sunny nature background", |
| 59 | + "perfume_product": "Product photography, a perfume placed on a white marble table with pineapple, coconut, lime next to it as decoration, white curtains, full of intricate details, realistic, minimalist, layered gestures in a bright and concise atmosphere, minimalist style.", |
| 60 | + "mountain_landscape": "The image presents a picturesque mountainous landscape under a cloudy sky. The mountains, blanketed in lush greenery, rise majestically, their slopes dotted with clusters of trees and shrubs. The sky above is a canvas of blue, adorned with fluffy white clouds that add a sense of tranquility to the scene. In the foreground, a valley unfolds, nestled between the towering mountains. It appears to be a rural area, with a few buildings and structures visible, suggesting the presence of a small settlement. The buildings are scattered, blending harmoniously with the natural surroundings. The image is captured from a high vantage point, providing a sweeping view of the valley and the mountains." |
| 61 | +} |
| 62 | + |
| 63 | +# OUTPUT |
| 64 | +output_dir = "outputs" |
| 65 | +os.makedirs(output_dir, exist_ok=True) |
| 66 | + |
| 67 | +# GEN IMG |
| 68 | +for category, prompt in prompts.items(): |
| 69 | + cfg = 3 |
| 70 | + tau = 0.5 |
| 71 | + h_div_w = 1/1 # Aspect Ratio |
| 72 | + seed = random.randint(0, 10000) |
| 73 | + enable_positive_prompt = 0 |
| 74 | + |
| 75 | + h_div_w_template_ = h_div_w_templates[np.argmin(np.abs(h_div_w_templates-h_div_w))] |
| 76 | + scale_schedule = dynamic_resolution_h_w[h_div_w_template_][args.pn]['scales'] |
| 77 | + scale_schedule = [(1, h, w) for (_, h, w) in scale_schedule] |
| 78 | + |
| 79 | + # GEN |
| 80 | + generated_image = gen_one_img( |
| 81 | + infinity, |
| 82 | + vae, |
| 83 | + text_tokenizer, |
| 84 | + text_encoder, |
| 85 | + prompt, |
| 86 | + g_seed=seed, |
| 87 | + gt_leak=0, |
| 88 | + gt_ls_Bl=None, |
| 89 | + cfg_list=cfg, |
| 90 | + tau_list=tau, |
| 91 | + scale_schedule=scale_schedule, |
| 92 | + cfg_insertion_layer=[args.cfg_insertion_layer], |
| 93 | + vae_type=args.vae_type, |
| 94 | + sampling_per_bits=args.sampling_per_bits, |
| 95 | + enable_positive_prompt=enable_positive_prompt, |
| 96 | + ) |
| 97 | + |
| 98 | + # SAVE |
| 99 | + save_path = osp.join(output_dir, f"re_{category}_test.jpg") |
| 100 | + cv2.imwrite(save_path, generated_image.cpu().numpy()) |
| 101 | + print(f"{category} image saved to {save_path}") |
0 commit comments