diff --git a/toolkit/config_modules.py b/toolkit/config_modules.py index b6c0a3d77..6f13361f7 100644 --- a/toolkit/config_modules.py +++ b/toolkit/config_modules.py @@ -837,7 +837,9 @@ def __init__(self, **kwargs): self.cache_text_embeddings: bool = kwargs.get('cache_text_embeddings', False) self.standardize_images: bool = kwargs.get('standardize_images', False) - + self.preserve_resolutions: bool = kwargs.get( + "preserve_resolutions", False + ) # https://albumentations.ai/docs/api_reference/augmentations/transforms # augmentations are returned as a separate image and cannot currently be cached self.augmentations: List[dict] = kwargs.get('augmentations', None) diff --git a/toolkit/dataloader_mixins.py b/toolkit/dataloader_mixins.py index 69df9cbd7..cc1279a42 100644 --- a/toolkit/dataloader_mixins.py +++ b/toolkit/dataloader_mixins.py @@ -237,42 +237,52 @@ def setup_buckets(self: 'AiToolkitDataset', quiet=False): file_item.crop_x = 0 file_item.crop_y = int(file_item.scale_to_height / 2 - resolution / 2) elif not did_process_poi: - bucket_resolution = get_bucket_for_image_size( - width, height, - resolution=resolution, - divisibility=bucket_tolerance - ) + if hasattr(self.dataset_config, 'preserve_resolutions') and self.dataset_config.preserve_resolutions: + # Don't resize at all + file_item.scale_to_width = width + file_item.scale_to_height = height + file_item.crop_width = width + file_item.crop_height = height + file_item.crop_x = 0 + file_item.crop_y = 0 + + else: + bucket_resolution = get_bucket_for_image_size( + width, height, + resolution=resolution, + divisibility=bucket_tolerance + ) - # Calculate scale factors for width and height - width_scale_factor = bucket_resolution["width"] / width - height_scale_factor = bucket_resolution["height"] / height + # Calculate scale factors for width and height + width_scale_factor = bucket_resolution["width"] / width + height_scale_factor = bucket_resolution["height"] / height - # Use the maximum of the scale factors to ensure both dimensions are scaled above the bucket resolution - max_scale_factor = max(width_scale_factor, height_scale_factor) + # Use the maximum of the scale factors to ensure both dimensions are scaled above the bucket resolution + max_scale_factor = max(width_scale_factor, height_scale_factor) - # round up - file_item.scale_to_width = int(math.ceil(width * max_scale_factor)) - file_item.scale_to_height = int(math.ceil(height * max_scale_factor)) + # round up + file_item.scale_to_width = int(math.ceil(width * max_scale_factor)) + file_item.scale_to_height = int(math.ceil(height * max_scale_factor)) - file_item.crop_height = bucket_resolution["height"] - file_item.crop_width = bucket_resolution["width"] + file_item.crop_height = bucket_resolution["height"] + file_item.crop_width = bucket_resolution["width"] - new_width = bucket_resolution["width"] - new_height = bucket_resolution["height"] + new_width = bucket_resolution["width"] + new_height = bucket_resolution["height"] - if self.dataset_config.random_crop: - # random crop - crop_x = random.randint(0, file_item.scale_to_width - new_width) - crop_y = random.randint(0, file_item.scale_to_height - new_height) - file_item.crop_x = crop_x - file_item.crop_y = crop_y - else: - # do central crop - file_item.crop_x = int((file_item.scale_to_width - new_width) / 2) - file_item.crop_y = int((file_item.scale_to_height - new_height) / 2) + if self.dataset_config.random_crop: + # random crop + crop_x = random.randint(0, file_item.scale_to_width - new_width) + crop_y = random.randint(0, file_item.scale_to_height - new_height) + file_item.crop_x = crop_x + file_item.crop_y = crop_y + else: + # do central crop + file_item.crop_x = int((file_item.scale_to_width - new_width) / 2) + file_item.crop_y = int((file_item.scale_to_height - new_height) / 2) - if file_item.crop_y < 0 or file_item.crop_x < 0: - print_acc('debug') + if file_item.crop_y < 0 or file_item.crop_x < 0: + print_acc('debug') # check if bucket exists, if not, create it bucket_key = f'{file_item.crop_width}x{file_item.crop_height}' @@ -289,7 +299,6 @@ def setup_buckets(self: 'AiToolkitDataset', quiet=False): print_acc(f'{key}: {len(bucket.file_list_idx)} files') print_acc(f'{len(self.buckets)} buckets made') - class CaptionProcessingDTOMixin: def __init__(self: 'FileItemDTO', *args, **kwargs): if hasattr(super(), '__init__'): diff --git a/toolkit/print.py b/toolkit/print.py index e0f6c23b0..baf054ae8 100644 --- a/toolkit/print.py +++ b/toolkit/print.py @@ -11,7 +11,7 @@ def print_acc(*args, **kwargs): class Logger: def __init__(self, filename): self.terminal = sys.stdout - self.log = open(filename, 'a') + self.log = open(filename, 'a', encoding='utf-8') def write(self, message): self.terminal.write(message) diff --git a/ui/src/app/jobs/new/SimpleJob.tsx b/ui/src/app/jobs/new/SimpleJob.tsx index 2d9d92689..8ba51f852 100644 --- a/ui/src/app/jobs/new/SimpleJob.tsx +++ b/ui/src/app/jobs/new/SimpleJob.tsx @@ -50,7 +50,6 @@ export default function SimpleJob({ count += 1; // add quantization card } return count; - }, [modelArch]); let topBarClass = 'grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 xl:grid-cols-4 gap-6'; @@ -78,7 +77,7 @@ export default function SimpleJob({ let ARAs: SelectOption[] = []; if (modelArch.accuracyRecoveryAdapters) { for (const [label, value] of Object.entries(modelArch.accuracyRecoveryAdapters)) { - ARAs.push({ value, label }); + ARAs.push({ value, label }); } } if (ARAs.length > 0) { @@ -270,14 +269,14 @@ export default function SimpleJob({ /> setJobConfig(value, 'config.process[0].train.switch_boundary_every')} - placeholder="eg. 1" - docKey={'train.switch_boundary_every'} - min={1} - required - /> + label="Switch Every" + value={jobConfig.config.process[0].train.switch_boundary_every} + onChange={value => setJobConfig(value, 'config.process[0].train.switch_boundary_every')} + placeholder="eg. 1" + docKey={'train.switch_boundary_every'} + min={1} + required + /> )} @@ -638,6 +637,14 @@ export default function SimpleJob({ docKey="datasets.do_i2v" /> )} + + setJobConfig(value, `config.process[0].datasets[${i}].preserve_resolutions`) + } + docKey="datasets.preserve_resolutions" + /> ( { diff --git a/ui/src/docs.tsx b/ui/src/docs.tsx index 6b4144887..29afce337 100644 --- a/ui/src/docs.tsx +++ b/ui/src/docs.tsx @@ -80,6 +80,17 @@ const docs: { [key: string]: ConfigDoc } = { ), }, + 'datasets.preserve_resolutions': { + title: 'Preserve Resolutions', + description: ( + <> + This disables any kind of resizing or bucketing and will train your images at their original resolutions. +
+
+ Any specified resolution settings will be ignored. + + ), + }, 'datasets.do_i2v': { title: 'Do I2V', description: ( @@ -132,8 +143,8 @@ const docs: { [key: string]: ConfigDoc } = { Some models have multi stage networks that are trained and used separately in the denoising process. Most common, is to have 2 stages. One for high noise and one for low noise. You can choose to train both stages at once or train them separately. If trained at the same time, The trainer will alternate between training each - model every so many steps and will output 2 different LoRAs. If you choose to train only one stage, the - trainer will only train that stage and output a single LoRA. + model every so many steps and will output 2 different LoRAs. If you choose to train only one stage, the trainer + will only train that stage and output a single LoRA. ), }, diff --git a/ui/src/types.ts b/ui/src/types.ts index 5034d4f96..61d00e565 100644 --- a/ui/src/types.ts +++ b/ui/src/types.ts @@ -87,6 +87,7 @@ export interface DatasetConfig { num_frames: number; shrink_video_to_frames: boolean; do_i2v: boolean; + preserve_resolutions: boolean; flip_x: boolean; flip_y: boolean; }