Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions vace/annotators/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,61 +2,61 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

class PlainImageAnnotator:
def __init__(self, cfg):
def __init__(self, cfg, device=None):
pass
def forward(self, image):
return image

class PlainVideoAnnotator:
def __init__(self, cfg):
def __init__(self, cfg, device=None):
pass
def forward(self, frames):
return frames

class PlainMaskAnnotator:
def __init__(self, cfg):
def __init__(self, cfg, device=None):
pass
def forward(self, mask):
return mask

class PlainMaskAugInvertAnnotator:
def __init__(self, cfg):
def __init__(self, cfg, device=None):
pass
def forward(self, mask):
return 255 - mask

class PlainMaskAugAnnotator:
def __init__(self, cfg):
def __init__(self, cfg, device=None):
pass
def forward(self, mask):
return mask

class PlainMaskVideoAnnotator:
def __init__(self, cfg):
def __init__(self, cfg, device=None):
pass
def forward(self, mask):
return mask

class PlainMaskAugVideoAnnotator:
def __init__(self, cfg):
def __init__(self, cfg, device=None):
pass
def forward(self, masks):
return masks

class PlainMaskAugInvertVideoAnnotator:
def __init__(self, cfg):
def __init__(self, cfg, device=None):
pass
def forward(self, masks):
return [255 - mask for mask in masks]

class ExpandMaskVideoAnnotator:
def __init__(self, cfg):
def __init__(self, cfg, device=None):
pass
def forward(self, mask, expand_num):
return [mask] * expand_num

class PlainPromptAnnotator:
def __init__(self, cfg):
def __init__(self, cfg, device=None):
pass
def forward(self, prompt):
return prompt
12 changes: 6 additions & 6 deletions vace/annotators/composition.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np

class CompositionAnnotator:
def __init__(self, cfg):
def __init__(self, cfg, device=None):
self.process_types = ["repaint", "extension", "control"]
self.process_map = {
"repaint": "repaint",
Expand Down Expand Up @@ -44,7 +44,7 @@ def forward(self, process_type_1, process_type_2, frames_1, frames_2, masks_1, m


class ReferenceAnythingAnnotator:
def __init__(self, cfg):
def __init__(self, cfg, device=None):
from .subject import SubjectAnnotator
self.sbjref_ins = SubjectAnnotator(cfg['SUBJECT'] if 'SUBJECT' in cfg else cfg)
self.key_map = {
Expand Down Expand Up @@ -74,7 +74,7 @@ def forward(self, images, mode=None, return_mask=None, mask_cfg=None):


class AnimateAnythingAnnotator:
def __init__(self, cfg):
def __init__(self, cfg, device=None):
from .pose import PoseBodyFaceVideoAnnotator
self.pose_ins = PoseBodyFaceVideoAnnotator(cfg['POSE'])
self.ref_ins = ReferenceAnythingAnnotator(cfg['REFERENCE'])
Expand All @@ -91,7 +91,7 @@ def forward(self, frames=None, images=None, mode=None, return_mask=None, mask_cf


class SwapAnythingAnnotator:
def __init__(self, cfg):
def __init__(self, cfg, device=None):
from .inpainting import InpaintingVideoAnnotator
self.inp_ins = InpaintingVideoAnnotator(cfg['INPAINTING'])
self.ref_ins = ReferenceAnythingAnnotator(cfg['REFERENCE'])
Expand All @@ -110,7 +110,7 @@ def forward(self, video=None, frames=None, images=None, mode=None, mask=None, bb


class ExpandAnythingAnnotator:
def __init__(self, cfg):
def __init__(self, cfg, device=None):
from .outpainting import OutpaintingAnnotator
from .frameref import FrameRefExpandAnnotator
self.ref_ins = ReferenceAnythingAnnotator(cfg['REFERENCE'])
Expand All @@ -137,7 +137,7 @@ def forward(self, images=None, mode=None, return_mask=None, mask_cfg=None, direc


class MoveAnythingAnnotator:
def __init__(self, cfg):
def __init__(self, cfg, device=None):
from .layout import LayoutBboxAnnotator
self.layout_bbox_ins = LayoutBboxAnnotator(cfg['LAYOUTBBOX'])

Expand Down
4 changes: 2 additions & 2 deletions vace/annotators/depth.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class DepthAnnotator:
def __init__(self, cfg, device=None):
from .midas.api import MiDaSInference
pretrained_model = cfg['PRETRAINED_MODEL']
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else device
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else (torch.device(device) if isinstance(device, str) else device)
self.model = MiDaSInference(model_type='dpt_hybrid', model_path=pretrained_model).to(self.device)
self.a = cfg.get('A', np.pi * 2.0)
self.bg_th = cfg.get('BG_TH', 0.1)
Expand Down Expand Up @@ -53,7 +53,7 @@ class DepthV2Annotator:
def __init__(self, cfg, device=None):
from .depth_anything_v2.dpt import DepthAnythingV2
pretrained_model = cfg['PRETRAINED_MODEL']
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else device
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else (torch.device(device) if isinstance(device, str) else device)
self.model = DepthAnythingV2(encoder='vitl', features=256, out_channels=[256, 512, 1024, 1024]).to(self.device)
self.model.load_state_dict(
torch.load(
Expand Down
2 changes: 1 addition & 1 deletion vace/annotators/face.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def __init__(self, cfg, device=None):
self.return_dict = cfg.get('RETURN_DICT', False)
self.multi_face = cfg.get('MULTI_FACE', True)
pretrained_model = cfg['PRETRAINED_MODEL']
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else device
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else (torch.device(device) if isinstance(device, str) else device)
self.device_id = self.device.index if self.device.type == 'cuda' else None
ctx_id = self.device_id if self.device_id is not None else 0
self.model = FaceAnalysis(name=cfg.MODEL_NAME, root=pretrained_model, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
Expand Down
2 changes: 1 addition & 1 deletion vace/annotators/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __init__(self, cfg, device=None):
}
params = argparse.Namespace(**params)
pretrained_model = cfg['PRETRAINED_MODEL']
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else device
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else (torch.device(device) if isinstance(device, str) else device)
self.model = RAFT(params)
self.model.load_state_dict({k.replace('module.', ''): v for k, v in torch.load(pretrained_model, map_location="cpu", weights_only=True).items()})
self.model = self.model.to(self.device).eval()
Expand Down
2 changes: 1 addition & 1 deletion vace/annotators/gdino.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def __init__(self, cfg, device=None):
self.text_threshold = cfg.get('TEXT_THRESHOLD', 0.2)
self.iou_threshold = cfg.get('IOU_THRESHOLD', 0.5)
self.use_nms = cfg.get('USE_NMS', True)
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else device
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else (torch.device(device) if isinstance(device, str) else device)
self.model = Model(model_config_path=grounding_dino_config_path,
model_checkpoint_path=grounding_dino_checkpoint_path,
device=self.device)
Expand Down
2 changes: 1 addition & 1 deletion vace/annotators/gray.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


class GrayAnnotator:
def __init__(self, cfg):
def __init__(self, cfg, device=None):
pass
def forward(self, image):
image = convert_to_numpy(image)
Expand Down
2 changes: 1 addition & 1 deletion vace/annotators/pose.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class PoseAnnotator:
def __init__(self, cfg, device=None):
onnx_det = cfg['DETECTION_MODEL']
onnx_pose = cfg['POSE_MODEL']
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else device
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else (torch.device(device) if isinstance(device, str) else device)
self.pose_estimation = Wholebody(onnx_det, onnx_pose, device=self.device)
self.resize_size = cfg.get("RESIZE_SIZE", 1024)
self.use_body = cfg.get('USE_BODY', True)
Expand Down
2 changes: 1 addition & 1 deletion vace/annotators/prompt_extend.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def __init__(self, cfg, device=None):
self.model_name = cfg.get('MODEL_NAME', "Qwen2.5_3B")
self.is_vl = cfg.get('IS_VL', False)
self.system_prompt = cfg.get('SYSTEM_PROMPT', None)
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else device
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else (torch.device(device) if isinstance(device, str) else device)
self.device_id = self.device.index if self.device.type == 'cuda' else None
rank = self.device_id if self.device_id is not None else 0
if self.mode == "dashscope":
Expand Down
2 changes: 1 addition & 1 deletion vace/annotators/ram.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(self, cfg, device=None):
ram_checkpoint_path = cfg['PRETRAINED_MODEL']
ram_type = cfg.get('RAM_TYPE', 'swin_l')
self.return_lang = cfg.get('RETURN_LANG', ['en']) # ['en', 'zh']
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else device
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else (torch.device(device) if isinstance(device, str) else device)
self.model = ram_plus(pretrained=ram_checkpoint_path, image_size=image_size, vit=ram_type,
text_encoder_type=ram_tokenizer_path, delete_tag_index=delete_tag_index).eval().to(self.device)
self.ram_transform = Compose([
Expand Down
2 changes: 1 addition & 1 deletion vace/annotators/salient.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ def __init__(self, cfg, device=None):
self.return_image = cfg.get('RETURN_IMAGE', False)
self.use_crop = cfg.get('USE_CROP', False)
pretrained_model = cfg['PRETRAINED_MODEL']
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else device
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else (torch.device(device) if isinstance(device, str) else device)
self.norm_mean = [0.485, 0.456, 0.406]
self.norm_std = [0.229, 0.224, 0.225]
self.norm_size = cfg.get('NORM_SIZE', [320, 320])
Expand Down
2 changes: 1 addition & 1 deletion vace/annotators/sam.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def __init__(self, cfg, device=None):
self.task_type = cfg.get('TASK_TYPE', 'input_box')
self.return_mask = cfg.get('RETURN_MASK', False)
self.transform = ResizeLongestSide(1024)
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else device
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else (torch.device(device) if isinstance(device, str) else device)
seg_model = sam_model_registry[cfg.get('MODEL_NAME', 'vit_b')](checkpoint=cfg['PRETRAINED_MODEL']).eval().to(self.device)
self.predictor = SamPredictor(seg_model)

Expand Down
2 changes: 1 addition & 1 deletion vace/annotators/scribble.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def __init__(self, cfg, device=None):
n_residual_blocks = cfg.get('N_RESIDUAL_BLOCKS', 3)
sigmoid = cfg.get('SIGMOID', True)
pretrained_model = cfg['PRETRAINED_MODEL']
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else device
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else (torch.device(device) if isinstance(device, str) else device)
self.model = ContourInference(input_nc, output_nc, n_residual_blocks,
sigmoid)
self.model.load_state_dict(torch.load(pretrained_model, weights_only=True))
Expand Down
10 changes: 5 additions & 5 deletions vace/gradios/vace_preprocess_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def dict_to_markdown_table(d):


class VACEImageTag():
def __init__(self, cfg):
def __init__(self, cfg, device=None):
self.save_dir = os.path.join(cfg.save_dir, 'image')
if not os.path.exists(self.save_dir):
os.makedirs(self.save_dir)
Expand Down Expand Up @@ -320,7 +320,7 @@ def set_callbacks_image(self, **kwargs):


class VACEVideoTag():
def __init__(self, cfg):
def __init__(self, cfg, device=None):
self.save_dir = os.path.join(cfg.save_dir, 'video')
if not os.path.exists(self.save_dir):
os.makedirs(self.save_dir)
Expand Down Expand Up @@ -651,7 +651,7 @@ def set_callbacks_video(self, **kwargs):


class VACETagComposition():
def __init__(self, cfg):
def __init__(self, cfg, device=None):
self.save_dir = os.path.join(cfg.save_dir, 'composition')
if not os.path.exists(self.save_dir):
os.makedirs(self.save_dir)
Expand Down Expand Up @@ -805,7 +805,7 @@ def set_callbacks_composition(self, **kwargs):


class VACEVideoTool():
def __init__(self, cfg):
def __init__(self, cfg, device=None):
self.save_dir = os.path.join(cfg.save_dir, 'video_tool')
if not os.path.exists(self.save_dir):
os.makedirs(self.save_dir)
Expand Down Expand Up @@ -1031,7 +1031,7 @@ def set_callbacks_video_tool(self, **kwargs):

class VACETag():

def __init__(self, cfg):
def __init__(self, cfg, device=None):
self.cfg = cfg
self.save_dir = cfg.save_dir
self.current_index = 0
Expand Down
2 changes: 1 addition & 1 deletion vace/models/wan/wan_vace.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ def __init__(
self.ring_size = ring_size
self.dynamic_load()

self.device = 'cpu' if torch.cuda.is_available() else 'cpu'
self.device = 'gpu' if torch.cuda.is_available() else 'cpu'
self.vid_proc = VaceVideoProcessor(
downsample=tuple([x * y for x, y in zip(config.vae_stride, config.patch_size)]),
min_area=720 * 1280,
Expand Down