Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions gradio/gradio_infer_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def update_task(hf_model_id: str) -> Tuple[gr.Dropdown, gr.Component]:
def update_subcheckpoints(checkpoint_dir):
"""Get subdirectories for the selected checkpoint directory."""
if checkpoint_dir == "None":
return gr.Dropdown(choices=[], interactive=False, visible=False)
return gr.Dropdown(choices=["None"], value="None", interactive=False, visible=False)

# Get the full path to the checkpoint directory
full_checkpoint_path = os.path.join(checkpoint_rootdir, checkpoint_dir)
Expand All @@ -138,7 +138,7 @@ def update_subcheckpoints(checkpoint_dir):

if not subdirs:
# If there are no subdirectories, hide the dropdown
return gr.Dropdown(choices=[], interactive=False, visible=False)
return gr.Dropdown(choices=["None"], value="None", interactive=False, visible=False)

# Show dropdown with available subdirectories
return gr.Dropdown(
Expand Down Expand Up @@ -183,6 +183,7 @@ def load_model_and_generate(
)

# Load LoRA weights if selected
unload_lora_checkpoint(pipeline)
if lora_checkpoint != "None":
progress(0.3, desc="Loading LoRA weights...")
# Construct the full path to the specific checkpoint
Expand All @@ -192,8 +193,6 @@ def load_model_and_generate(
lora_path = lora_checkpoint
logger.info(f"Loading LoRA weights from {lora_path}")
load_lora_checkpoint(pipeline, lora_path)
else:
unload_lora_checkpoint(pipeline)

# Generate content based on task
progress(0.5, desc="Generating content...")
Expand Down Expand Up @@ -300,7 +299,7 @@ def load_model_and_generate(
guidance_scale = gr.Slider(
minimum=1.0,
maximum=15.0,
value=6.0,
value=5.0,
step=0.1,
label="Guidance Scale",
info="Higher values increase prompt adherence",
Expand Down
2 changes: 1 addition & 1 deletion gradio/gradio_lora_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from torchvision.io import write_video
from utils import (
BaseTask,
flatten_dict,
get_dataset_dirs,
get_logger,
get_lora_checkpoint_rootdir,
Expand All @@ -24,6 +23,7 @@

import gradio as gr
from cogkit import GenerationMode, guess_generation_mode
from cogkit.utils import flatten_dict

# ======================= global state ====================

Expand Down
3 changes: 1 addition & 2 deletions gradio/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
resolve_path,
)
from .logging import get_logger
from .misc import flatten_dict, get_resolutions
from .misc import get_resolutions
from .task import BaseTask

__all__ = [
Expand All @@ -22,5 +22,4 @@
"resolve_path",
"BaseTask",
"get_resolutions",
"flatten_dict",
]
36 changes: 1 addition & 35 deletions gradio/utils/misc.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
from typing import Any, Dict, List

from cogkit import GenerationMode


def get_resolutions(task: GenerationMode) -> List[str]:
def get_resolutions(task: GenerationMode) -> list[str]:
if task == GenerationMode.TextToImage:
return [
"512x512",
Expand All @@ -19,35 +17,3 @@ def get_resolutions(task: GenerationMode) -> List[str]:
"49x480x720",
"81x768x1360",
]


def flatten_dict(d: Dict[str, Any], ignore_none: bool = False) -> Dict[str, Any]:
"""
Flattens a nested dictionary into a single layer dictionary.

Args:
d: The dictionary to flatten
ignore_none: If True, keys with None values will be omitted

Returns:
A flattened dictionary

Raises:
ValueError: If there are duplicate keys across nested dictionaries
"""
result = {}

def _flatten(current_dict, result_dict):
for key, value in current_dict.items():
if value is None and ignore_none:
continue

if isinstance(value, dict):
_flatten(value, result_dict)
else:
if key in result_dict:
raise ValueError(f"Duplicate key '{key}' found in nested dictionary")
result_dict[key] = value

_flatten(d, result)
return result
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ finetune = [
"datasets~=3.4",
"deepspeed~=0.16.4",
"av~=14.2.0",
"bitsandbytes~=0.45.4",
"tensorboard~=2.19",
]

[project.urls]
Expand Down
2 changes: 1 addition & 1 deletion quickstart/scripts/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def main():
parser = argparse.ArgumentParser()
parser.add_argument("--model_name", type=str, required=True)
parser.add_argument("--training_type", type=str, required=True)
parser.add_argument("--enable_packing", action="store_true")
parser.add_argument("--enable_packing", type=lambda x: x.lower() == "true")
args, unknown = parser.parse_known_args()

trainer_cls = get_model_cls(args.model_name, args.training_type, args.enable_packing)
Expand Down
14 changes: 7 additions & 7 deletions quickstart/scripts/train_ddp_i2v.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,6 @@ OUTPUT_ARGS=(
# Data Configuration
DATA_ARGS=(
--data_root "/path/to/data"

# Note:
# for CogVideoX series models, number of training frames should be **8N+1**
# for CogVideoX1.5 series models, number of training frames should be **16N+1**
--train_resolution "81x768x1360" # (frames x height x width)
)

# Training Configuration
Expand All @@ -35,13 +30,18 @@ TRAIN_ARGS=(
--batch_size 1
--gradient_accumulation_steps 1
--mixed_precision "bf16" # ["no", "fp16"]
--learning_rate 2e-5
--learning_rate 5e-5

# Note:
# for CogVideoX series models, number of training frames should be **8N+1**
# for CogVideoX1.5 series models, number of training frames should be **16N+1**
--train_resolution "81x768x1360" # (frames x height x width)
)

# System Configuration
SYSTEM_ARGS=(
--num_workers 8
--pin_memory True
--pin_memory true
--nccl_timeout 1800
)

Expand Down
24 changes: 18 additions & 6 deletions quickstart/scripts/train_ddp_t2i.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,26 +21,38 @@ OUTPUT_ARGS=(
# Data Configuration
DATA_ARGS=(
--data_root "/path/to/data"

# Note:
# For CogView4 series models, height and width should be **32N** (multiple of 32)
--train_resolution "1024x1024" # (height x width)
)

# Training Configuration
TRAIN_ARGS=(
--seed 42 # random seed
--train_epochs 1 # number of training epochs
--batch_size 1

--gradient_accumulation_steps 1

# Note: For CogView4 series models, height and width should be **32N** (multiple of 32)
--train_resolution "1024x1024" # (height x width)

# When enable_packing is true, training will use the native image resolution
# (otherwise all images will be resized to train_resolution, which may distort the original aspect ratio).
#
# IMPORTANT: When changing enable_packing from true to false (or vice versa),
# make sure to clear the .cache directories in your data_root/train and data_root/test folders if they exist.
--enable_packing false

--mixed_precision "bf16" # ["no", "fp16"]
--learning_rate 2e-5
--learning_rate 5e-5

# enable --low_vram will slow down validation speed and enable quantization during training
# Note: --low_vram currently does not support multi-GPU training
--low_vram false
)

# System Configuration
SYSTEM_ARGS=(
--num_workers 8
--pin_memory True
--pin_memory true
--nccl_timeout 1800
)

Expand Down
14 changes: 7 additions & 7 deletions quickstart/scripts/train_ddp_t2v.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,6 @@ OUTPUT_ARGS=(
# Data Configuration
DATA_ARGS=(
--data_root "/path/to/data"

# Note:
# for CogVideoX series models, number of training frames should be **8N+1**
# for CogVideoX1.5 series models, number of training frames should be **16N+1**
--train_resolution "81x768x1360" # (frames x height x width)
)

# Training Configuration
Expand All @@ -34,13 +29,18 @@ TRAIN_ARGS=(
--batch_size 1
--gradient_accumulation_steps 1
--mixed_precision "bf16" # ["no", "fp16"] Note: CogVideoX-2B only supports fp16 training
--learning_rate 2e-5
--learning_rate 5e-5

# Note:
# for CogVideoX series models, number of training frames should be **8N+1**
# for CogVideoX1.5 series models, number of training frames should be **16N+1**
--train_resolution "81x768x1360" # (frames x height x width)
)

# System Configuration
SYSTEM_ARGS=(
--num_workers 8
--pin_memory True
--pin_memory true
--nccl_timeout 1800
)

Expand Down
15 changes: 8 additions & 7 deletions quickstart/scripts/train_zero_i2v.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,31 +20,32 @@ OUTPUT_ARGS=(
# Data Configuration
DATA_ARGS=(
--data_root "/path/to/data"

# Note:
# for CogVideoX series models, number of training frames should be **8N+1**
# for CogVideoX1.5 series models, number of training frames should be **16N+1**
--train_resolution "81x768x1360" # (frames x height x width)
)

# Training Configuration
TRAIN_ARGS=(
--seed 42 # random seed
--train_epochs 1 # number of training epochs

--learning_rate 2e-5
--learning_rate 5e-5

######### Please keep consistent with deepspeed config file ##########
--batch_size 1
--gradient_accumulation_steps 1
--mixed_precision "bf16" # ["no", "fp16"] Note: CogVideoX-2B only supports fp16 training
########################################################################

# Note:
# for CogVideoX series models, number of training frames should be **8N+1**
# for CogVideoX1.5 series models, number of training frames should be **16N+1**
--train_resolution "81x768x1360" # (frames x height x width)

)

# System Configuration
SYSTEM_ARGS=(
--num_workers 8
--pin_memory True
--pin_memory true
--nccl_timeout 1800
)

Expand Down
20 changes: 13 additions & 7 deletions quickstart/scripts/train_zero_t2i.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,31 +20,37 @@ OUTPUT_ARGS=(
# Data Configuration
DATA_ARGS=(
--data_root "/path/to/data"

# Note:
# For CogView4 series models, height and width should be **32N** (multiple of 32)
--train_resolution "1024x1024" # (height x width)
)

# Training Configuration
TRAIN_ARGS=(
--seed 42 # random seed
--train_epochs 1 # number of training epochs

--learning_rate 2e-5
--learning_rate 5e-5

# Note: For CogView4 series models, height and width should be **32N** (multiple of 32)
--train_resolution "1024x1024" # (height x width)

######### Please keep consistent with deepspeed config file ##########
--batch_size 1
--gradient_accumulation_steps 1
--mixed_precision "bf16" # ["no", "fp16"] Note: CogVideoX-2B only supports fp16 training
########################################################################

# When enable_packing is true, training will use the native image resolution
# (otherwise all images will be resized to train_resolution, which may distort the original aspect ratio).
#
# IMPORTANT: When changing enable_packing from true to false (or vice versa),
# make sure to clear the .cache directories in your data_root/train and data_root/test folders if they exist.
--enable_packing false

)

# System Configuration
SYSTEM_ARGS=(
--num_workers 8
--pin_memory True
--pin_memory true
--nccl_timeout 1800
)

Expand All @@ -62,7 +68,7 @@ VALIDATION_ARGS=(
)

# Combine all arguments and launch training
accelerate launch --config_file ../configs/accelerate_config.yaml train.py \
accelerate launch --config_file ../configs/accelerate_config.yaml train.py\
"${MODEL_ARGS[@]}" \
"${OUTPUT_ARGS[@]}" \
"${DATA_ARGS[@]}" \
Expand Down
14 changes: 7 additions & 7 deletions quickstart/scripts/train_zero_t2v.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,31 +20,31 @@ OUTPUT_ARGS=(
# Data Configuration
DATA_ARGS=(
--data_root "/path/to/data"

# Note:
# for CogVideoX series models, number of training frames should be **8N+1**
# for CogVideoX1.5 series models, number of training frames should be **16N+1**
--train_resolution "81x768x1360" # (frames x height x width)
)

# Training Configuration
TRAIN_ARGS=(
--seed 42 # random seed
--train_epochs 1 # number of training epochs

--learning_rate 2e-5
--learning_rate 5e-5

######### Please keep consistent with deepspeed config file ##########
--batch_size 1
--gradient_accumulation_steps 1
--mixed_precision "bf16" # ["no", "fp16"] Note: CogVideoX-2B only supports fp16 training
########################################################################

# Note:
# for CogVideoX series models, number of training frames should be **8N+1**
# for CogVideoX1.5 series models, number of training frames should be **16N+1**
--train_resolution "81x768x1360" # (frames x height x width)
)

# System Configuration
SYSTEM_ARGS=(
--num_workers 8
--pin_memory True
--pin_memory true
--nccl_timeout 1800
)

Expand Down
Loading