From 18b35ea2d30354e300488bdf35c66da86f637bcb Mon Sep 17 00:00:00 2001 From: zjm <1746163329@qq.com> Date: Thu, 30 May 2024 15:50:46 +0800 Subject: [PATCH] ddpm --- .../conf/km_re1000_rs256_conditional.yaml | 64 ++ ...e1000_rs256_sparse_recons_conditional.yaml | 50 ++ examples/ddpm/denoise_step.py | 201 +++++++ examples/ddpm/functions.py | 557 ++++++++++++++++++ examples/ddpm/main.py | 117 ++++ examples/ddpm/utils.py | 223 +++++++ ppsci/arch/__init__.py | 3 + ppsci/arch/ddpm.py | 411 +++++++++++++ 8 files changed, 1626 insertions(+) create mode 100644 examples/ddpm/conf/km_re1000_rs256_conditional.yaml create mode 100644 examples/ddpm/conf/kmflow_re1000_rs256_sparse_recons_conditional.yaml create mode 100644 examples/ddpm/denoise_step.py create mode 100644 examples/ddpm/functions.py create mode 100644 examples/ddpm/main.py create mode 100644 examples/ddpm/utils.py create mode 100644 ppsci/arch/ddpm.py diff --git a/examples/ddpm/conf/km_re1000_rs256_conditional.yaml b/examples/ddpm/conf/km_re1000_rs256_conditional.yaml new file mode 100644 index 0000000000..a929e8cf6e --- /dev/null +++ b/examples/ddpm/conf/km_re1000_rs256_conditional.yaml @@ -0,0 +1,64 @@ +mode: train +train: + seed: 1234 + output_dir: 'experiments/km256/' + doc: 'weights/km256/' + timesteps: 1000 + log_path: '' + verbose: 'info' + ni: false + comment: '' + resume_training: false +data: + dataset: "kolmogorov flow" + data_dir: "/home/aistudio/data/data264003/train_data.npy" + stat_path: "/home/aistudio/data/data264003/km256_stats.npz" + image_size: 256 + channels: 3 + logit_transform: false + uniform_dequantization: false + gaussian_dequantization: false + random_flip: false + rescaled: false + num_workers: 0 + +model: + type: "conditional" + in_channels: 3 + out_ch: 3 + ch: 64 + ch_mult: [1, 1, 1, 2] + num_res_blocks: 1 + attn_resolutions: [16, ] + dropout: 0.1 + var_type: fixedlarge + ema_rate: 0.9999 + ema: True + resamp_with_conv: True + ckpt_path: '/home/aistudio/data/data264003/init_ckpt.pdparams' + +diffusion: + beta_schedule: linear + beta_start: 0.0001 + beta_end: 0.02 + num_diffusion_timesteps: 1000 # Might need to be changed to 500 later to match SDEdit + +training: + batch_size: 1 + n_epochs: 20 # 300 epoch for about 12 hours + n_iters: 200000 + snapshot_freq: 20000 + validation_freq: 2000 + +sampling: + batch_size: 32 + last_only: True + +optim: + weight_decay: 0.000 + optimizer: "Adam" + lr: 0.0002 + beta1: 0.9 + amsgrad: false + eps: 0.00000001 + grad_clip: 1.0 diff --git a/examples/ddpm/conf/kmflow_re1000_rs256_sparse_recons_conditional.yaml b/examples/ddpm/conf/kmflow_re1000_rs256_sparse_recons_conditional.yaml new file mode 100644 index 0000000000..4ddc0f3ee3 --- /dev/null +++ b/examples/ddpm/conf/kmflow_re1000_rs256_sparse_recons_conditional.yaml @@ -0,0 +1,50 @@ +log_dir: "./experiments/kmflow_re1000_rs256_ddim_recons_conditional_log" +mode: eval +place: 'cuda' +data: + dataset: "kmflow" + category: "kmflow" + image_size: 256 + channels: 3 + num_workers: 0 + data_dir: "/home/aistudio/data/data264003/kf_2d_re1000_256_40seed.npy" + sample_data_dir: "/home/aistudio/data/data264003/kmflow_sampled_data_irregnew.npz" + stat_path: "/home/aistudio/data/data264003/km256_stats.npz" + data_kw: 'u3232' + smoothing: True + smoothing_scale: 7 + +model: + type: "conditional" + in_channels: 3 + out_ch: 3 + ch: 64 + ch_mult: [ 1, 1, 1, 2 ] + num_res_blocks: 1 + attn_resolutions: [ 16, ] + dropout: 0.0 + var_type: fixedlarge + ema_rate: 0.9999 + ema: True + resamp_with_conv: True + ckpt_path: "/home/aistudio/data/data264003/latest.pdparams" + +diffusion: + beta_schedule: linear + beta_start: 0.0001 + beta_end: 0.02 + num_diffusion_timesteps: 1000 +eval: + repeat_run: 1 + sample_step: 1 + t: 240 + reverse_steps: 30 + seed: 1234 + + +sampling: + batch_size: 20 + last_only: True + guidance_weight: 0. + log_loss: True + dump_arr: True diff --git a/examples/ddpm/denoise_step.py b/examples/ddpm/denoise_step.py new file mode 100644 index 0000000000..a03c9cdd7b --- /dev/null +++ b/examples/ddpm/denoise_step.py @@ -0,0 +1,201 @@ +import paddle + + +def compute_alpha(beta, t): + beta = paddle.concat(x=[paddle.zeros(shape=[1]).to("gpu"), beta], axis=0) + beta_sub = 1 - beta + + # 2. 计算累积乘积 + cumprod_beta = paddle.cumprod(beta_sub, dim=0) + + # 3. 根据t + 1的值选择特定的元素 + # 注意:Paddle中使用gather函数来实现类似的索引选择功能 + # 假设t是一个标量,我们需要将其转换为Tensor,如果t已经是Tensor则不需要转换 + if not isinstance(t, paddle.Tensor): + t = paddle.to_tensor(t, dtype="int64") + selected = paddle.gather(cumprod_beta, index=t + 1, axis=0) + + # 4. 改变张量的形状 + # PaddlePaddle使用reshape函数来改变形状 + a = paddle.reshape(selected, shape=[-1, 1, 1, 1]) + # """Class Method: *.view, can not convert, please check whether it is torch.Tensor.*/Optimizer.*/nn.Module.*/torch.distributions.Distribution.*/torch.autograd.function.FunctionCtx.*/torch.profiler.profile.*/torch.autograd.profiler.profile.*, and convert manually""" + # >>>>>> a = (1 - beta).cumprod(dim=0).index_select(axis=0, index=t + 1).view(-1, + # 1, 1, 1) + return a + + +def ddim_steps(x, seq, model, b, **kwargs): + n = x.shape[0] + seq_next = [-1] + list(seq[:-1]) + x0_preds = [] + xs = [x] + dx_func = kwargs.get("dx_func", None) + clamp_func = kwargs.get("clamp_func", None) + cache = kwargs.get("cache", False) + logger = kwargs.get("logger", None) + if logger is not None: + logger.update(x=xs[-1]) + for i, j in zip(reversed(seq), reversed(seq_next)): + with paddle.no_grad(): + t = (paddle.ones(shape=n) * i).to(x.place) + next_t = (paddle.ones(shape=n) * j).to(x.place) + at = compute_alpha(b, t.astype(dtype="int64")) + at_next = compute_alpha(b, next_t.astype(dtype="int64")) + xt = xs[-1].to("cuda") + et = model(xt, t) + x0_t = (xt - et * (1 - at).sqrt()) / at.sqrt() + x0_preds.append(x0_t.to("cpu")) + c2 = (1 - at_next).sqrt() + if dx_func is not None: + dx = dx_func(xt) + else: + dx = 0 + with paddle.no_grad(): + xt_next = at_next.sqrt() * x0_t + c2 * et - dx + if clamp_func is not None: + xt_next = clamp_func(xt_next) + xs.append(xt_next.to("cpu")) + if logger is not None: + logger.update(x=xs[-1]) + if not cache: + xs = xs[-1:] + x0_preds = x0_preds[-1:] + return xs, x0_preds + + +def ddpm_steps(x, seq, model, b, **kwargs): + n = x.shape[0] + seq_next = [-1] + list(seq[:-1]) + xs = [x] + x0_preds = [] + betas = b + dx_func = kwargs.get("dx_func", None) + cache = kwargs.get("cache", False) + clamp_func = kwargs.get("clamp_func", None) + for i, j in zip(reversed(seq), reversed(seq_next)): + with paddle.no_grad(): + t = (paddle.ones(shape=n) * i).to(x.place) + next_t = (paddle.ones(shape=n) * j).to(x.place) + at = compute_alpha(betas, t.astype(dtype="int64")) + atm1 = compute_alpha(betas, next_t.astype(dtype="int64")) + beta_t = 1 - at / atm1 + x = xs[-1].to("cuda") + output = model(x, t.astype(dtype="float32")) + e = output + x0_from_e = (1.0 / at).sqrt() * x - (1.0 / at - 1).sqrt() * e + x0_from_e = paddle.clip(x=x0_from_e, min=-1, max=1) + x0_preds.append(x0_from_e.to("cpu")) + mean_eps = ( + atm1.sqrt() * beta_t * x0_from_e + (1 - beta_t).sqrt() * (1 - atm1) * x + ) / (1.0 - at) + mean = mean_eps + noise = paddle.randn(shape=x.shape, dtype=x.dtype) + mask = 1 - (t == 0).astype(dtype="float32") + """Class Method: *.view, can not convert, please check whether it is torch.Tensor.*/Optimizer.*/nn.Module.*/torch.distributions.Distribution.*/torch.autograd.function.FunctionCtx.*/torch.profiler.profile.*/torch.autograd.profiler.profile.*, and convert manually""" + # >>>>>> mask = mask.view(-1, 1, 1, 1) + mask = paddle.reshape(mask, shape=[-1, 1, 1, 1]) + logvar = beta_t.log() + if dx_func is not None: + dx = dx_func(x) + else: + dx = 0 + with paddle.no_grad(): + sample = mean + mask * paddle.exp(x=0.5 * logvar) * noise - dx + if clamp_func is not None: + sample = clamp_func(sample) + xs.append(sample.to("cpu")) + if not cache: + xs = xs[-1:] + x0_preds = x0_preds[-1:] + return xs, x0_preds + + +def guided_ddpm_steps(x, seq, model, b, **kwargs): + n = x.shape[0] + seq_next = [-1] + list(seq[:-1]) + xs = [x] + x0_preds = [] + betas = b + dx_func = kwargs.get("dx_func", None) + if dx_func is None: + raise ValueError("dx_func is required for guided denoising") + clamp_func = kwargs.get("clamp_func", None) + cache = kwargs.get("cache", False) + w = kwargs.get("w", 3.0) + for i, j in zip(reversed(seq), reversed(seq_next)): + with paddle.no_grad(): + t = (paddle.ones(shape=n) * i).to(x.place) + next_t = (paddle.ones(shape=n) * j).to(x.place) + at = compute_alpha(betas, t.astype(dtype="int64")) + atm1 = compute_alpha(betas, next_t.astype(dtype="int64")) + beta_t = 1 - at / atm1 + x = xs[-1].to("cuda") + dx = dx_func(x) + with paddle.no_grad(): + output = (w + 1) * model(x, t.astype(dtype="float32"), dx) - w * model( + x, t.astype(dtype="float32") + ) + e = output + x0_from_e = (1.0 / at).sqrt() * x - (1.0 / at - 1).sqrt() * e + x0_from_e = paddle.clip(x=x0_from_e, min=-1, max=1) + x0_preds.append(x0_from_e.to("cpu")) + mean_eps = ( + atm1.sqrt() * beta_t * x0_from_e + (1 - beta_t).sqrt() * (1 - atm1) * x + ) / (1.0 - at) + mean = mean_eps + noise = paddle.randn(shape=x.shape, dtype=x.dtype) + mask = 1 - (t == 0).astype(dtype="float32") + """Class Method: *.view, can not convert, please check whether it is torch.Tensor.*/Optimizer.*/nn.Module.*/torch.distributions.Distribution.*/torch.autograd.function.FunctionCtx.*/torch.profiler.profile.*/torch.autograd.profiler.profile.*, and convert manually""" + # >>>>>> mask = mask.view(-1, 1, 1, 1) + mask = paddle.reshape(mask, shape=[-1, 1, 1, 1]) + logvar = beta_t.log() + with paddle.no_grad(): + sample = mean + mask * paddle.exp(x=0.5 * logvar) * noise - dx + if clamp_func is not None: + sample = clamp_func(sample) + xs.append(sample.to("cpu")) + if not cache: + xs = xs[-1:] + x0_preds = x0_preds[-1:] + return xs, x0_preds + + +def guided_ddim_steps(x, seq, model, b, **kwargs): + n = x.shape[0] + seq_next = [-1] + list(seq[:-1]) + x0_preds = [] + xs = [x] + dx_func = kwargs.get("dx_func", None) + if dx_func is None: + raise ValueError("dx_func is required for guided denoising") + clamp_func = kwargs.get("clamp_func", None) + cache = kwargs.get("cache", False) + w = kwargs.get("w", 3.0) + logger = kwargs.get("logger", None) + if logger is not None: + xs[-1] = paddle.to_tensor(xs[-1], place=paddle.CUDAPlace(0)) # 将张量转移到 CUDA 设备上 + logger.update(x=xs[-1]) + for i, j in zip(reversed(seq), reversed(seq_next)): + with paddle.no_grad(): + t = (paddle.ones(shape=n) * i).to("gpu") + next_t = (paddle.ones(shape=n) * j).to("gpu") + at = compute_alpha(b, t.astype(dtype="int64")) + at_next = compute_alpha(b, next_t.astype(dtype="int64")) + xt = xs[-1].to("gpu") + dx = dx_func(xt) + with paddle.no_grad(): + et = (w + 1) * model(xt, t, dx) - w * model(xt, t) + x0_t = (xt - et * (1 - at).sqrt()) / at.sqrt() + x0_preds.append(x0_t.to("cpu")) + c2 = (1 - at_next).sqrt() + with paddle.no_grad(): + xt_next = at_next.sqrt() * x0_t + c2 * et - dx + if clamp_func is not None: + xt_next = clamp_func(xt_next) + xs.append(xt_next.to("cpu")) + if logger is not None: + logger.update(x=xs[-1]) + if not cache: + xs = xs[-1:] + x0_preds = x0_preds[-1:] + return xs, x0_preds diff --git a/examples/ddpm/functions.py b/examples/ddpm/functions.py new file mode 100644 index 0000000000..539b73b91d --- /dev/null +++ b/examples/ddpm/functions.py @@ -0,0 +1,557 @@ +import logging +import os +import pickle +import sys + +import matplotlib.pyplot as plt +import numpy as np +import paddle +from denoise_step import ddim_steps +from denoise_step import guided_ddim_steps +from mpl_toolkits.axes_grid1 import ImageGrid +from PIL import Image + +sys.path.append("home/aistudio/work/PaddleScience/ppsci/arch") +sys.path.append("/home/aistudio/external-libraries") + + +def normalize_array(x): + x_min = np.amin(x) + x_max = np.amax(x) + y = (x - x_min) / (x_max - x_min) + return y, x_min, x_max + + +def unnormalize_array(y, x_min, x_max): + return y * (x_max - x_min) + x_min + + +def data_blurring(fno_data_sample): + ds_size = 16 + resample_method = Image.NEAREST + x_array, x_min, x_max = normalize_array(fno_data_sample.numpy()) + im = Image.fromarray((x_array * 255).astype(np.uint8)) + im_ds = im.resize((ds_size, ds_size)) + im_us = im_ds.resize((im.width, im.height), resample=resample_method) + x_array_blur = np.asarray(im_us) + x_array_blur = x_array_blur.astype(np.float32) / 255.0 + x_array_blur = unnormalize_array(x_array_blur, x_min, x_max) + return paddle.to_tensor(data=x_array_blur) + + +class MetricLogger(object): + def __init__(self, metric_fn_dict): + self.metric_fn_dict = metric_fn_dict + self.metric_dict = {} + self.reset() + + def reset(self): + for key in self.metric_fn_dict.keys(): + self.metric_dict[key] = [] + + @paddle.no_grad() + def update(self, **kwargs): + for key in self.metric_fn_dict.keys(): + self.metric_dict[key].append(self.metric_fn_dict[key](**kwargs)) + + def get(self): + return self.metric_dict.copy() + + def log(self, outdir, postfix=""): + # 准备一个新字典,用于存储可以被pickle序列化的对象 + serializable_metric_dict = {} + + def to_serializable(value): + """将值转换为可序列化的格式""" + if isinstance(value, paddle.Tensor): + # 将Tensor转换为NumPy数组 + return value.numpy() + elif isinstance(value, dict): + # 递归处理字典中的值 + return {k: to_serializable(v) for k, v in value.items()} + elif isinstance(value, list): + # 递归处理列表中的值 + return [to_serializable(v) for v in value] + else: + # 其他类型的值直接返回 + return value + + # 遍历metric_dict并转换所有值 + for key, value in self.metric_dict.items(): + serializable_metric_dict[key] = to_serializable(value) + + # 使用修改后的字典进行序列化 + with open(os.path.join(outdir, f"metric_log_{postfix}.pkl"), "wb") as f: + pickle.dump(serializable_metric_dict, f) + + +def get_beta_schedule(*, beta_start, beta_end, num_diffusion_timesteps): + betas = np.linspace(beta_start, beta_end, num_diffusion_timesteps, dtype=np.float64) + assert betas.shape == (num_diffusion_timesteps,) + return betas + + +def load_flow_data(path, stat_path=None): + data = np.load(path) + print("Original data shape:", data.shape) + data_mean, data_scale = np.mean(data[:-4]), np.std(data[:-4]) + print(f"Data range: mean: {data_mean} scale: {data_scale}") + data = data[-4:, (...)].copy().astype(np.float32) + data = paddle.to_tensor(data=data, dtype="float32") + flattened_data = [] + for i in range(data.shape[0]): + for j in range(data.shape[1] - 2): + flattened_data.append(data[(i), j : j + 3, (...)]) + flattened_data = paddle.stack(x=flattened_data, axis=0) + print(f"data shape: {flattened_data.shape}") + return flattened_data, data_mean.item(), data_scale.item() + + +def create_gaussian_kernel(kernel_size, sigma): + # 创建一个二维高斯核 + ax = paddle.arange(-kernel_size // 2 + 1.0, kernel_size // 2 + 1.0) + xx, yy = paddle.meshgrid(ax, ax) + kernel = paddle.exp(-(xx**2 + yy**2) / (2.0 * sigma**2)) + kernel = kernel / paddle.sum(kernel) + return kernel + + +def gaussian_blur(image, kernel_size, sigma): + # 创建高斯核 + gaussian_kernel = create_gaussian_kernel(kernel_size, sigma) + + # 扩展维度以匹配卷积函数的权重格式 [Co, Ci, H, W] + gaussian_kernel = gaussian_kernel.reshape([1, 1, kernel_size, kernel_size]) + gaussian_kernel = paddle.tile( + gaussian_kernel, repeat_times=[image.shape[1], 1, 1, 1] + ) + + # 使用高斯核进行卷积 + # PaddlePaddle的conv2d需要输入的形状是[N, C, H, W],其中N是批量大小,C是通道数 + # 因此,可能需要调整输入tensor的形状以符合这个要求 + padding = kernel_size // 2 + blurred_image = paddle.nn.functional.conv2d( + image, weight=gaussian_kernel, stride=1, padding=padding, groups=image.shape[1] + ) + return blurred_image + + +def load_recons_data(ref_path, sample_path, data_kw, smoothing, smoothing_scale): + with np.load(sample_path, allow_pickle=True) as f: + sampled_data = f[data_kw][-4:, (...)].copy().astype(np.float32) + sampled_data = paddle.to_tensor(data=sampled_data, dtype="float32") + ref_data = np.load(ref_path).astype(np.float32) + data_mean, data_scale = np.mean(ref_data[:-4]), np.std(ref_data[:-4]) + ref_data = ref_data[-4:, (...)].copy().astype(np.float32) + ref_data = paddle.to_tensor(data=ref_data, dtype="float32") + flattened_sampled_data = [] + flattened_ref_data = [] + for i in range(ref_data.shape[0]): + for j in range(ref_data.shape[1] - 2): + flattened_ref_data.append(ref_data[(i), j : j + 3, (...)]) + flattened_sampled_data.append(sampled_data[(i), j : j + 3, (...)]) + flattened_ref_data = paddle.stack(x=flattened_ref_data, axis=0) + flattened_sampled_data = paddle.stack(x=flattened_sampled_data, axis=0) + if smoothing: + arr = flattened_sampled_data + ker_size = smoothing_scale + arr = paddle.nn.functional.pad( + arr, + pad=( + (ker_size - 1) // 2, + (ker_size - 1) // 2, + (ker_size - 1) // 2, + (ker_size - 1) // 2, + ), + mode="circular", + ) + arr = gaussian_blur(arr, ker_size, ker_size) + flattened_sampled_data = arr[ + (...), + (ker_size - 1) // 2 : -(ker_size - 1) // 2, + (ker_size - 1) // 2 : -(ker_size - 1) // 2, + ] + print(f"data shape: {flattened_ref_data.shape}") + return ( + flattened_ref_data, + flattened_sampled_data, + data_mean.item(), + data_scale.item(), + ) + + +class MinMaxScaler(object): + def __init__(self, min, max): + self.min = min + self.max = max + + def __call__(self, x): + return x - self.min + + def inverse(self, x): + return x * (self.max - self.min) + self.min + + def scale(self): + return self.max - self.min + + +class StdScaler(object): + def __init__(self, mean, std): + self.mean = mean + self.std = std + + def __call__(self, x): + return (x - self.mean) / self.std + + def inverse(self, x): + return x * self.std + self.mean + + def scale(self): + return self.std + + +def nearest_blur_image(data, scale): + blur_data = data[:, :, ::scale, ::scale] + return blur_data + + +def gaussian_blur_image(data, scale): + blur_data = paddle.visualization.transforms.GaussianBlur( + kernel_size=scale, sigma=2 * scale + 1 + )(data) + return blur_data + + +def random_square_hole_mask(data, hole_size): + h, w = data.shape[2:] + mask = paddle.zeros(shape=data.shape, dtype="int64").to(data.place) + hole_x = np.random.randint(0, w - hole_size) + hole_y = np.random.randint(0, h - hole_size) + mask[(...), hole_y : hole_y + hole_size, hole_x : hole_x + hole_size] = 1 + return mask + + +def make_image_grid(images, out_path, ncols=10): + t, h, w = images.shape + images = images.detach().cpu().numpy() + b = t // ncols + fig = plt.figure(figsize=(8.0, 8.0)) + grid = ImageGrid(fig, 111, nrows_ncols=(b, ncols)) + for ax, im_no in zip(grid, np.arange(b * ncols)): + ax.imshow(images[(im_no), :, :], cmap="twilight", vmin=-23, vmax=23) + ax.axis("off") + plt.savefig(out_path, bbox_inches="tight") + plt.close() + + +def ensure_dir(path): + if not os.path.exists(path): + os.makedirs(path) + + +def slice2sequence(data): + # 首先,选取第二个维度的特定切片 + selected_slice = data[:, 1:2, :, :] # 选取F维度的第二个切片,保持形状为 [T, 1, H, W] + + # 然后,如果你想要合并T和F维度,可以使用reshape + # 但在这个例子中,由于F维度只有一个元素,实际上你不需要合并,只需要去掉F这一维 + data = paddle.squeeze(selected_slice, axis=1) # 结果形状为 [T, H, W] + return data + + +def l1_loss(x, y): + return paddle.mean(x=paddle.abs(x=x - y)) + + +def l2_loss(x, y): + return ((x - y) ** 2).mean(axis=(-1, -2)).sqrt().mean() + + +def float_to_complex(x): + x = paddle.cast(x, dtype="float32") + img_x = paddle.zeros_like(x) + x = paddle.complex(x, img_x) + return x + + +def voriticity_residual(w, re=1000.0, dt=1 / 32, calc_grad=True): + w = w.clone() + w.stop_gradient = not True + nx = w.shape[2] + w_h = paddle.fft.fft2(x=w[:, 1:-1], axes=[2, 3]) + k_max = nx // 2 + N = nx + k_x = ( + paddle.concat( + x=( + paddle.arange(start=0, end=k_max, step=1), + paddle.arange(start=-k_max, end=0, step=1), + ), + axis=0, + ) + .reshape([N, 1]) + .tile(repeat_times=[1, N]) + .reshape([1, 1, N, N]) + ) + k_y = ( + paddle.concat( + x=( + paddle.arange(start=0, end=k_max, step=1), + paddle.arange(start=-k_max, end=0, step=1), + ), + axis=0, + ) + .reshape([1, N]) + .tile(repeat_times=[N, 1]) + .reshape([1, 1, N, N]) + ) + + lap = k_x**2 + k_y**2 + + k_x = float_to_complex(k_x) + k_y = float_to_complex(k_y) + + lap[..., 0, 0] = 1.0 + + lap = float_to_complex(lap) + + psi_h = w_h / lap + u_h = 1.0j * k_y * psi_h + v_h = -1.0j * k_x * psi_h + wx_h = 1.0j * k_x * w_h + wy_h = 1.0j * k_y * w_h + wlap_h = -lap * w_h + u = paddle.fft.irfft2(x=u_h[(...), :, : k_max + 1], axes=[2, 3]) + v = paddle.fft.irfft2(x=v_h[(...), :, : k_max + 1], axes=[2, 3]) + wx = paddle.fft.irfft2(x=wx_h[(...), :, : k_max + 1], axes=[2, 3]) + wy = paddle.fft.irfft2(x=wy_h[(...), :, : k_max + 1], axes=[2, 3]) + wlap = paddle.fft.irfft2(x=wlap_h[(...), :, : k_max + 1], axes=[2, 3]) + advection = u * wx + v * wy + wt = (w[:, 2:, :, :] - w[:, :-2, :, :]) / (2 * dt) + x = paddle.linspace(start=0, stop=2 * np.pi, num=nx + 1) + x = x[0:-1] + X, Y = paddle.meshgrid(x, x) + f = -4 * paddle.cos(x=4 * Y) + residual = wt + (advection - 1.0 / re * wlap + 0.1 * w[:, 1:-1]) - f + residual_loss = (residual**2).mean() + if calc_grad: + dw = paddle.grad(outputs=residual_loss, inputs=w)[0] + return dw, residual_loss + else: + return residual_loss + + +def l2_loss_fn(x, scaler, gt): + return l2_loss(scaler.inverse(x).to("gpu"), gt) + + +def equation_loss_fn(x, scaler): + return voriticity_residual(scaler.inverse(x), calc_grad=False) + + +def physical_gradient_func1(x, scaler): + return voriticity_residual(scaler.inverse(x))[0] / scaler.scale() + + +def physical_gradient_func2(x, scaler, config): + return ( + voriticity_residual(scaler.inverse(x))[0] + / scaler.scale() + * config.sampling.lambda_ + ) + + +def eval_input(model, config): + args = config.eval + os.makedirs(config.log_dir, exist_ok=True) + if config.model.type == "conditional": + dir_name = "recons_{}_t{}_r{}_w{}".format( + config.data.data_kw, + args.t, + args.reverse_steps, + config.sampling.guidance_weight, + ) + else: + dir_name = "recons_{}_t{}_r{}_lam{}".format( + config.data.data_kw, args.t, args.reverse_steps, config.sampling.lambda_ + ) + if config.model.type == "conditional": + print("Use residual gradient guidance during sampling") + dir_name = "guided_" + dir_name + elif config.sampling.lambda_ > 0: + print("Use residual gradient penalty during sampling") + dir_name = "pi_" + dir_name + else: + print("Not use physical gradient during sampling") + log_dir = os.path.join(config.log_dir, dir_name) + os.makedirs(log_dir, exist_ok=True) + logger = logging.getLogger("LOG") + logger.setLevel(logging.INFO) + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) + file_handler = logging.FileHandler("%s/%s.txt" % (log_dir, "logging_info")) + file_handler.setLevel(logging.INFO) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + image_sample_dir = log_dir + device = "gpu" if paddle.device.cuda.device_count() >= 1 else "cpu" + config.place = str(paddle.CUDAPlace(0)) + betas = get_beta_schedule( + beta_start=config.diffusion.beta_start, + beta_end=config.diffusion.beta_end, + num_diffusion_timesteps=config.diffusion.num_diffusion_timesteps, + ) + betas = paddle.to_tensor(data=betas).astype(dtype="float32").to(device) + # alphas = 1.0 - betas + + # restuct + logger.info("Doing sparse reconstruction task") + logger.info("Loading model") + model.to("gpu") + logger.info("Model loaded") + model.eval() + logger.info("Preparing data") + ref_data, blur_data, data_mean, data_std = load_recons_data( + config.data.data_dir, + config.data.sample_data_dir, + config.data.data_kw, + smoothing=config.data.smoothing, + smoothing_scale=config.data.smoothing_scale, + ) + scaler = StdScaler(data_mean, data_std) + logger.info("Start sampling") + testset = paddle.io.TensorDataset([blur_data, ref_data]) + test_loader = paddle.io.DataLoader( + testset, + batch_size=config.sampling.batch_size, + shuffle=False, + num_workers=config.data.num_workers, + use_shared_memory=False, + ) + l2_loss_all = np.zeros((ref_data.shape[0], args.repeat_run, args.sample_step)) + residual_loss_all = np.zeros((ref_data.shape[0], args.repeat_run, args.sample_step)) + for batch_index, (blur_data, data) in enumerate(test_loader): + logger.info("Batch: {} / Total batch {}".format(batch_index, len(test_loader))) + x0 = blur_data.to(device) + gt = data.to(device) + logger.info("Preparing reference image") + logger.info("Dumping visualization...") + sample_folder = "sample_batch{}".format(batch_index) + ensure_dir(os.path.join(image_sample_dir, sample_folder)) + sample_img_filename = "input_image.png" + path_to_dump = os.path.join( + image_sample_dir, sample_folder, sample_img_filename + ) + x0_masked = x0.clone() + make_image_grid(slice2sequence(x0_masked), path_to_dump) + sample_img_filename = "reference_image.png" + path_to_dump = os.path.join( + image_sample_dir, sample_folder, sample_img_filename + ) + make_image_grid(slice2sequence(gt), path_to_dump) + if config.sampling.dump_arr: + np.save( + os.path.join(image_sample_dir, sample_folder, "input_arr.npy"), + slice2sequence(x0).cpu().numpy(), + ) + np.save( + os.path.join(image_sample_dir, sample_folder, "reference_arr.npy"), + slice2sequence(data).cpu().numpy(), + ) + l2_loss_init = l2_loss(x0, gt) + logger.info("L2 loss init: {}".format(l2_loss_init)) + gt_residual = voriticity_residual(gt)[1].detach() + init_residual = voriticity_residual(x0)[1].detach() + logger.info("Residual init: {}".format(init_residual)) + logger.info("Residual reference: {}".format(gt_residual)) + x0 = scaler(x0) + xinit = x0.clone() + if config.sampling.log_loss: + logger1 = MetricLogger( + {"l2 loss": l2_loss_fn, "residual loss": equation_loss_fn} + ) + for repeat in range(args.repeat_run): + logger.info(f"Run No.{repeat}:") + x0 = xinit.clone() + for it in range(args.sample_step): + e = paddle.randn(shape=x0.shape, dtype=x0.dtype) + total_noise_levels = int(args.t * 0.7**it) + a = (1 - betas).cumprod(dim=0) + x = ( + x0 * a[total_noise_levels - 1].sqrt() + + e * (1.0 - a[total_noise_levels - 1]).sqrt() + ) + if config.model.type == "conditional": + physical_gradient_func = physical_gradient_func1(x, scaler) + elif config.sampling.lambda_ > 0: + physical_gradient_func = physical_gradient_func2(x, scaler, config) + num_of_reverse_steps = int(args.reverse_steps * 0.7**it) + betas = betas.to(device) + skip = total_noise_levels // num_of_reverse_steps + seq = range(0, total_noise_levels, skip) + if config.model.type == "conditional": + xs, _ = guided_ddim_steps( + x, + seq, + model, + betas, + w=config.sampling.guidance_weight, + dx_func=physical_gradient_func, + cache=False, + logger=logger1, + ) + elif config.sampling.lambda_ > 0: + xs, _ = ddim_steps( + x, + seq, + model, + betas, + dx_func=physical_gradient_func, + cache=False, + logger=logger1, + ) + else: + xs, _ = ddim_steps( + x, seq, model, betas, cache=False, logger=logger1 + ) + x = xs[-1] + x0 = xs[-1] + l2_loss_f = l2_loss(scaler.inverse(x.clone()).to("gpu"), gt) + logger.info("L2 loss it{}: {}".format(it, l2_loss_f)) + residual_loss_f = voriticity_residual( + scaler.inverse(x.clone()), calc_grad=False + ).detach() + logger.info("Residual it{}: {}".format(it, residual_loss_f)) + l2_loss_all[ + batch_index * x.shape[0] : (batch_index + 1) * x.shape[0], + (repeat), + (it), + ] = l2_loss_f.item() + residual_loss_all[ + batch_index * x.shape[0] : (batch_index + 1) * x.shape[0], + (repeat), + (it), + ] = residual_loss_f.item() + if config.sampling.dump_arr: + np.save( + os.path.join( + image_sample_dir, + sample_folder, + f"sample_arr_run_{repeat}_it{it}.npy", + ), + slice2sequence(scaler.inverse(x)).cpu().numpy(), + ) + if config.sampling.log_loss: + logger1.log( + os.path.join(image_sample_dir, sample_folder), + f"run_{repeat}_it{it}", + ) + logger1.reset() + logger.info("Finished batch {}".format(batch_index)) + logger.info("========================================================") + logger.info("Finished sampling") + logger.info(f"mean l2 loss: {l2_loss_all[..., -1].mean()}") + logger.info(f"std l2 loss: {l2_loss_all[..., -1].std(axis=1).mean()}") + logger.info(f"mean residual loss: {residual_loss_all[..., -1].mean()}") + logger.info(f"std residual loss: {residual_loss_all[..., -1].std(axis=1).mean()}") diff --git a/examples/ddpm/main.py b/examples/ddpm/main.py new file mode 100644 index 0000000000..5aac165d99 --- /dev/null +++ b/examples/ddpm/main.py @@ -0,0 +1,117 @@ +from os import path as osp + +import functions +import hydra +import numpy as np +import paddle +import utils +from omegaconf import DictConfig + +import ppsci +from ppsci.utils import logger + + +def train(cfg): + # 设置PaddlePaddle的随机种子 + paddle.seed(1234) + + # 设置NumPy的随机种子 + np.random.seed(1234) + + # 生成随机数 + print(paddle.randn([1])) # [0.63745004] + print(np.random.rand(1)) # [0.19151945] + args = cfg.train + config = cfg + data = np.load(cfg.data.data_dir) + coords = data + # set random seed for reproducibility + ppsci.utils.misc.set_random_seed(args.seed) + # initialize logger + logger.init_logger("ppsci", osp.join(args.output_dir, f"{cfg.mode}.log"), "info") + model = ppsci.arch.DDPM(config=config) + ppsci.utils.load_pretrain(model, config.model.ckpt_path) + + def _transform_out(_out): + return utils.transform_out(_out) + + model.register_input_transform(utils.transform_in) + model.register_output_transform(_transform_out) + # x = paddle.ones(shape=[1, 3, 256, 256]) + # n = 1 + # # 创建一个值为15的张量 + # t = paddle.full(shape=[1], fill_value=15) + + # # 拼接t和1000-t-1,并取前n个元素 + # t = paddle.concat([t, 1000 - t - 1], axis=0)[:n] + # dx = paddle.ones(shape=[n, 3, 256, 256]) + # output = model.forward(x, t, dx) + + # print('Model output:') + # print(output) + # raise NotImplementedError("not supported") + sup_constraint_pde = ppsci.constraint.SupervisedConstraint( + { + "dataset": { + "name": "NamedArrayDataset", + "input": { + "x0": coords, + }, + }, + "batch_size": cfg.training.batch_size, + "num_workers": cfg.data.num_workers, + }, + ppsci.loss.FunctionalLoss(utils.train_loss_func), + { + "loss": lambda out: out["loss"], + }, + name="sup_train", + ) + constraint_pde = {sup_constraint_pde.name: sup_constraint_pde} + + # initialize solver + clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=config.optim.grad_clip) + optimizer = ppsci.optimizer.Adam(config.optim.lr, grad_clip=clip)(model) + solver = ppsci.solver.Solver( + model, + constraint_pde, + output_dir=args.output_dir, + optimizer=optimizer, + save_freq=2, + epochs=cfg.training.n_epochs, + iters_per_epoch=11448, + pretrained_model_path="/home/aistudio/data/data264003/latest.pdparams", + validator=None, + ) + + solver.train() + + +def evaluate(cfg): + config = cfg + if config.model.type == "conditional": + print("Using conditional model") + model = ppsci.arch.DDPM(config=config) + else: + # 原始代码中没有unconditional的model,所以这里直接raise NotImplemente + raise NotImplementedError("not supported") + ppsci.utils.load_pretrain(model, config.model.ckpt_path) + functions.eval_input(model, config) + + +@hydra.main( + version_base=None, + config_path="conf/", + config_name="kmflow_re1000_rs256_sparse_recons_conditional.yaml", +) +def main(cfg: DictConfig): + if cfg.mode == "train": + train(cfg) + elif cfg.mode == "eval": + evaluate(cfg) + else: + raise ValueError(f"cfg.mode should in ['train', 'eval'], but got '{cfg.mode}'") + + +if __name__ == "__main__": + main() diff --git a/examples/ddpm/utils.py b/examples/ddpm/utils.py new file mode 100644 index 0000000000..7ad70ea2f7 --- /dev/null +++ b/examples/ddpm/utils.py @@ -0,0 +1,223 @@ +import numpy as np +import paddle + + +def transform_in(x, config, p=0.1): + """ + 对输入数据进行变换,并返回变换后的数据、时间步长、残差和噪声。 + + Args: + x (dict): 包含输入数据的字典,键为 "x0",值为 Paddle Tensor。 + config (dict): 配置参数字典,包含扩散参数、数据路径等。 + p (float, optional): 概率值,用于决定是否计算残差。默认为 0.1。 + + Returns: + tuple: 包含四个元素的元组,分别为: + - x (Paddle Tensor): 变换后的数据。 + - t (Paddle Tensor): 时间步长。 + - dx (Paddle Tensor or None): 残差,如果 flag < p 则为 None。 + - e (Paddle Tensor): 噪声。 + + """ + x = x["x0"] + # print(x) + betas = get_beta_schedule_train( + beta_schedule=config.diffusion.beta_schedule, + beta_start=config.diffusion.beta_start, + beta_end=config.diffusion.beta_end, + num_diffusion_timesteps=config.diffusion.num_diffusion_timesteps, + ) + betas = paddle.to_tensor(data=betas).astype(dtype="float32") + num_timesteps = betas.shape[0] + b = betas + n = x.shape[0] + e = paddle.randn(shape=x.shape, dtype=x.dtype) + t = paddle.randint(low=0, high=num_timesteps, shape=(n // 2 + 1,)) + t = paddle.concat(x=[t, num_timesteps - t - 1], axis=0)[:n] + data = np.load(config.data.stat_path) + x_offset = data["mean"] + x_scale = data["scale"] + beta_sub = 1 - b + cumprod_beta = paddle.cumprod(beta_sub, dim=0) + if not isinstance(t, paddle.Tensor): + t = paddle.to_tensor(t, dtype="int64") + selected = paddle.gather(cumprod_beta, index=t, axis=0) + a = paddle.reshape(selected, shape=[-1, 1, 1, 1]) + x = x * a.sqrt() + e * (1.0 - a).sqrt() + flag = np.random.uniform(0, 1) + if flag < p: + dx = None + else: + dx = voriticity_residual_train(x * x_scale + x_offset) / x_scale + return x, t, dx, e + + +def transform_out(outputs): + loss = (outputs[1] - outputs[0]).square().sum(axis=(1, 2, 3)).mean(axis=0) + + return {"loss": loss} + + +def train_loss_func(result_dict, *args) -> paddle.Tensor: + """For model calculation of loss. + + Args: + result_dict (Dict[str, paddle.Tensor]): The result dict. + + Returns: + paddle.Tensor: Loss value. + """ + return result_dict["loss"] + + +def float_to_complex(x): + x = paddle.cast(x, dtype="float32") + img_x = paddle.zeros_like(x) + x = paddle.complex(x, img_x) + return x + + +def voriticity_residual_train(w, re=1000.0, dt=1 / 32): + w = w.clone() + out_5 = w + out_5.stop_gradient = not True + out_5 + nx = w.shape[2] + w_h = paddle.fft.fft2(x=w[:, 1:-1], axes=[2, 3]) + k_max = nx // 2 + N = nx + k_x = ( + paddle.concat( + x=( + paddle.arange(start=0, end=k_max, step=1), + paddle.arange(start=-k_max, end=0, step=1), + ), + axis=0, + ) + .reshape([N, 1]) + .tile(repeat_times=[1, N]) + .reshape([1, 1, N, N]) + ) + k_y = ( + paddle.concat( + x=( + paddle.arange(start=0, end=k_max, step=1), + paddle.arange(start=-k_max, end=0, step=1), + ), + axis=0, + ) + .reshape([1, N]) + .tile(repeat_times=[N, 1]) + .reshape([1, 1, N, N]) + ) + + lap = k_x**2 + k_y**2 + lap = float_to_complex(lap) + k_x = float_to_complex(k_x) + k_y = float_to_complex(k_y) + lap[..., 0, 0] = 1.0 + psi_h = w_h / lap + u_h = 1.0j * k_y * psi_h + v_h = -1.0j * k_x * psi_h + wx_h = 1.0j * k_x * w_h + wy_h = 1.0j * k_y * w_h + wlap_h = -lap * w_h + u = paddle.fft.irfft2(x=u_h[(...), :, : k_max + 1], axes=[2, 3]) + v = paddle.fft.irfft2(x=v_h[(...), :, : k_max + 1], axes=[2, 3]) + wx = paddle.fft.irfft2(x=wx_h[(...), :, : k_max + 1], axes=[2, 3]) + wy = paddle.fft.irfft2(x=wy_h[(...), :, : k_max + 1], axes=[2, 3]) + wlap = paddle.fft.irfft2(x=wlap_h[(...), :, : k_max + 1], axes=[2, 3]) + advection = u * wx + v * wy + wt = (w[:, 2:, :, :] - w[:, :-2, :, :]) / (2 * dt) + x = paddle.linspace(start=0, stop=2 * np.pi, num=nx + 1) + x = x[0:-1] + X, Y = paddle.meshgrid(x, x) + f = -4 * paddle.cos(x=4 * Y) + residual = wt + (advection - 1.0 / re * wlap + 0.1 * w[:, 1:-1]) - f + residual_loss = (residual**2).mean() + dw = paddle.grad(outputs=residual_loss, inputs=w)[0] + return dw + + +def get_beta_schedule_train( + beta_schedule, *, beta_start, beta_end, num_diffusion_timesteps +): + def sigmoid(x): + return 1 / (np.exp(-x) + 1) + + if beta_schedule == "quad": + betas = ( + np.linspace( + beta_start**0.5, + beta_end**0.5, + num_diffusion_timesteps, + dtype=np.float64, + ) + ** 2 + ) + elif beta_schedule == "linear": + betas = np.linspace( + beta_start, beta_end, num_diffusion_timesteps, dtype=np.float64 + ) + elif beta_schedule == "const": + betas = beta_end * np.ones(num_diffusion_timesteps, dtype=np.float64) + elif beta_schedule == "jsd": + betas = 1.0 / np.linspace( + num_diffusion_timesteps, 1, num_diffusion_timesteps, dtype=np.float64 + ) + elif beta_schedule == "sigmoid": + betas = np.linspace(-6, 6, num_diffusion_timesteps) + betas = sigmoid(betas) * (beta_end - beta_start) + beta_start + else: + raise NotImplementedError(beta_schedule) + assert betas.shape == (num_diffusion_timesteps,) + return betas + + +class EMAHelper(object): + def __init__(self, mu=0.999): + self.mu = mu + self.shadow = {} + + def register(self, module): + if isinstance(module, paddle.DataParallel): + module = module.module + for name, param in module.named_parameters(): + if not param.stop_gradient: + self.shadow[name] = param.data.clone() + + def update(self, module): + if isinstance(module, paddle.DataParallel): + module = module.module + for name, param in module.named_parameters(): + if not param.stop_gradient: + self.shadow[name].data = ( + 1.0 - self.mu + ) * param.data + self.mu * self.shadow[name].data + + def ema(self, module): + if isinstance(module, paddle.DataParallel): + module = module.module + for name, param in module.named_parameters(): + if not param.stop_gradient: + param.data.copy_(self.shadow[name].data) + + def ema_copy(self, module): + if isinstance(module, paddle.DataParallel): + inner_module = module.module + module_copy = type(inner_module)(inner_module.config).to( + inner_module.config.device + ) + module_copy.set_state_dict(state_dict=inner_module.state_dict()) + module_copy = paddle.DataParallel(module_copy) + else: + module_copy = type(module)(module.config).to(module.config.device) + module_copy.set_state_dict(state_dict=module.state_dict()) + self.ema(module_copy) + return module_copy + + def state_dict(self): + return self.shadow + + def load_state_dict(self, state_dict): + self.shadow = state_dict diff --git a/ppsci/arch/__init__.py b/ppsci/arch/__init__.py index bcb0bfcc97..8a6219df49 100644 --- a/ppsci/arch/__init__.py +++ b/ppsci/arch/__init__.py @@ -46,10 +46,12 @@ from ppsci.arch.sfnonet import SFNONet # isort:skip from ppsci.arch.tfnonet import TFNO1dNet, TFNO2dNet, TFNO3dNet # isort:skip from ppsci.arch.unonet import UNONet # isort:skip +from ppsci.arch.ddpm import DDPM # isort:skip from ppsci.arch.cuboid_transformer import CuboidTransformer # isort:skip from ppsci.utils import logger # isort:skip + __all__ = [ "Arch", "AMGNet", @@ -84,6 +86,7 @@ "UNONet", "build_model", "CFDGCN", + "DDPM", ] diff --git a/ppsci/arch/ddpm.py b/ppsci/arch/ddpm.py new file mode 100644 index 0000000000..c985be1f47 --- /dev/null +++ b/ppsci/arch/ddpm.py @@ -0,0 +1,411 @@ +import math + +import paddle + +from ppsci.arch import base + + +def get_timestep_embedding(timesteps, embedding_dim): + """ + This matches the implementation in Denoising Diffusion Probabilistic Models: + From Fairseq. + Build sinusoidal embeddings. + This matches the implementation in tensor2tensor, but differs slightly + from the description in Section 3.5 of "Attention Is All You Need". + """ + assert len(timesteps.shape) == 1 + half_dim = embedding_dim // 2 + emb = math.log(10000) / (half_dim - 1) + emb = paddle.exp(x=paddle.arange(dtype="float32", end=half_dim) * -emb) + emb = emb.to(device="gpu") + emb = timesteps.astype(dtype="float32")[:, (None)] * emb[(None), :] + emb = paddle.concat(x=[paddle.sin(x=emb), paddle.cos(x=emb)], axis=1) + if embedding_dim % 2 == 1: + emb = paddle.nn.functional.pad(emb, pad=(0, 1, 0, 0), mode="constant", value=0) + return emb + + +def nonlinearity(x): + return x * paddle.nn.functional.sigmoid(x=x) + + +def Normalize(in_channels): + return paddle.nn.GroupNorm( + num_groups=8, + num_channels=in_channels, + epsilon=1e-06, + weight_attr=True, + bias_attr=True, + ) + + +class Upsample(paddle.nn.Layer): + def __init__(self, in_channels, with_conv): + super().__init__() + self.with_conv = with_conv + if self.with_conv: + self.conv = paddle.nn.Conv2D( + in_channels=in_channels, + out_channels=in_channels, + kernel_size=3, + stride=1, + padding=1, + padding_mode="circular", + ) + + def forward(self, x): + x = paddle.nn.functional.interpolate(x=x, scale_factor=2.0, mode="nearest") + if self.with_conv: + x = self.conv(x) + return x + + +class Downsample(paddle.nn.Layer): + def __init__(self, in_channels, with_conv): + super().__init__() + self.with_conv = with_conv + if self.with_conv: + self.conv = paddle.nn.Conv2D( + in_channels=in_channels, + out_channels=in_channels, + kernel_size=3, + stride=2, + padding=0, + ) + + def forward(self, x): + if self.with_conv: + pad = (0, 1, 0, 1) + x = paddle.nn.functional.pad(x, pad=pad, mode="circular") + x = self.conv(x) + else: + x = paddle.nn.functional.avg_pool2d( + kernel_size=2, stride=2, x=x, exclusive=False + ) + return x + + +class ResnetBlock(paddle.nn.Layer): + def __init__( + self, + *, + in_channels, + out_channels=None, + conv_shortcut=False, + dropout, + temb_channels=512 + ): + super().__init__() + self.in_channels = in_channels + out_channels = in_channels if out_channels is None else out_channels + self.out_channels = out_channels + self.use_conv_shortcut = conv_shortcut + self.norm1 = Normalize(in_channels) + self.conv1 = paddle.nn.Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=1, + padding=1, + padding_mode="circular", + ) + self.temb_proj = paddle.nn.Linear( + in_features=temb_channels, out_features=out_channels + ) + self.norm2 = Normalize(out_channels) + self.dropout = paddle.nn.Dropout(p=dropout) + self.conv2 = paddle.nn.Conv2D( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=1, + padding=1, + padding_mode="circular", + ) + if self.in_channels != self.out_channels: + if self.use_conv_shortcut: + self.conv_shortcut = paddle.nn.Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=1, + padding=1, + padding_mode="circular", + ) + else: + self.nin_shortcut = paddle.nn.Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + padding=0, + ) + + def forward(self, x, temb): + h = x + h = self.norm1(h) + h = nonlinearity(h) + h = self.conv1(h) + h = h + self.temb_proj(nonlinearity(temb))[:, :, (None), (None)] + h = self.norm2(h) + h = nonlinearity(h) + h = self.dropout(h) + h = self.conv2(h) + if self.in_channels != self.out_channels: + if self.use_conv_shortcut: + x = self.conv_shortcut(x) + else: + x = self.nin_shortcut(x) + return x + h + + +class AttnBlock(paddle.nn.Layer): + def __init__(self, in_channels): + super().__init__() + self.in_channels = in_channels + self.norm = Normalize(in_channels) + self.q = paddle.nn.Conv2D( + in_channels=in_channels, + out_channels=in_channels, + kernel_size=1, + stride=1, + padding=0, + ) + self.k = paddle.nn.Conv2D( + in_channels=in_channels, + out_channels=in_channels, + kernel_size=1, + stride=1, + padding=0, + ) + self.v = paddle.nn.Conv2D( + in_channels=in_channels, + out_channels=in_channels, + kernel_size=1, + stride=1, + padding=0, + ) + self.proj_out = paddle.nn.Conv2D( + in_channels=in_channels, + out_channels=in_channels, + kernel_size=1, + stride=1, + padding=0, + ) + + def forward(self, x): + h_ = x + h_ = self.norm(h_) + q = self.q(h_) + k = self.k(h_) + v = self.v(h_) + b, c, h, w = q.shape + q = q.reshape([b, c, h * w]) + q = q.transpose(perm=[0, 2, 1]) + k = k.reshape([b, c, h * w]) + w_ = paddle.bmm(x=q, y=k) + w_ = w_ * int(c) ** -0.5 + w_ = paddle.nn.functional.softmax(x=w_, axis=2) + v = v.reshape([b, c, h * w]) + w_ = w_.transpose(perm=[0, 2, 1]) + h_ = paddle.bmm(x=v, y=w_) + h_ = h_.reshape([b, c, h, w]) + h_ = self.proj_out(h_) + return x + h_ + + +class DDPM(base.Arch): + def __init__(self, config): + super(DDPM, self).__init__() + self.config = config + ch, out_ch, ch_mult = ( + config.model.ch, + config.model.out_ch, + tuple(config.model.ch_mult), + ) + num_res_blocks = config.model.num_res_blocks + attn_resolutions = config.model.attn_resolutions + dropout = config.model.dropout + in_channels = config.model.in_channels + resolution = config.data.image_size + resamp_with_conv = config.model.resamp_with_conv + num_timesteps = config.diffusion.num_diffusion_timesteps + if config.model.type == "bayesian": + out_3 = paddle.create_parameter( + shape=paddle.zeros(shape=num_timesteps).shape, + dtype=paddle.zeros(shape=num_timesteps).numpy().dtype, + default_initializer=paddle.nn.initializer.Assign( + paddle.zeros(shape=num_timesteps) + ), + ) + out_3.stop_gradient = not True + self.logvar = out_3 + self.ch = ch + self.temb_ch = self.ch * 4 + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + self.resolution = resolution + self.in_channels = in_channels + self.temb = paddle.nn.Layer() + self.temb.dense = paddle.nn.LayerList( + sublayers=[ + paddle.nn.Linear(in_features=self.ch, out_features=self.temb_ch), + paddle.nn.Linear(in_features=self.temb_ch, out_features=self.temb_ch), + ] + ) + self.emb_conv = paddle.nn.Sequential( + paddle.nn.Conv2D( + in_channels=in_channels, + out_channels=self.ch, + kernel_size=1, + stride=1, + padding=0, + ), + paddle.nn.GELU(), + paddle.nn.Conv2D( + in_channels=self.ch, + out_channels=self.ch, + kernel_size=3, + stride=1, + padding=1, + padding_mode="circular", + ), + ) + self.conv_in = paddle.nn.Conv2D( + in_channels=in_channels, + out_channels=self.ch, + kernel_size=3, + stride=1, + padding=1, + padding_mode="circular", + ) + self.combine_conv = paddle.nn.Conv2D( + in_channels=self.ch * 2, + out_channels=self.ch, + kernel_size=1, + stride=1, + padding=0, + ) + curr_res = resolution + in_ch_mult = (1,) + ch_mult + self.down = paddle.nn.LayerList() + block_in = None + for i_level in range(self.num_resolutions): + block = paddle.nn.LayerList() + attn = paddle.nn.LayerList() + block_in = ch * in_ch_mult[i_level] + block_out = ch * ch_mult[i_level] + for i_block in range(self.num_res_blocks): + block.append( + ResnetBlock( + in_channels=block_in, + out_channels=block_out, + temb_channels=self.temb_ch, + dropout=dropout, + ) + ) + block_in = block_out + if curr_res in attn_resolutions: + attn.append(AttnBlock(block_in)) + down = paddle.nn.Layer() + down.block = block + down.attn = attn + if i_level != self.num_resolutions - 1: + down.downsample = Downsample(block_in, resamp_with_conv) + curr_res = curr_res // 2 + self.down.append(down) + self.mid = paddle.nn.Layer() + self.mid.block_1 = ResnetBlock( + in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout, + ) + self.mid.attn_1 = AttnBlock(block_in) + self.mid.block_2 = ResnetBlock( + in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout, + ) + self.up = paddle.nn.LayerList() + for i_level in reversed(range(self.num_resolutions)): + block = paddle.nn.LayerList() + attn = paddle.nn.LayerList() + block_out = ch * ch_mult[i_level] + skip_in = ch * ch_mult[i_level] + for i_block in range(self.num_res_blocks + 1): + if i_block == self.num_res_blocks: + skip_in = ch * in_ch_mult[i_level] + block.append( + ResnetBlock( + in_channels=block_in + skip_in, + out_channels=block_out, + temb_channels=self.temb_ch, + dropout=dropout, + ) + ) + block_in = block_out + if curr_res in attn_resolutions: + attn.append(AttnBlock(block_in)) + up = paddle.nn.Layer() + up.block = block + up.attn = attn + if i_level != 0: + up.upsample = Upsample(block_in, resamp_with_conv) + curr_res = curr_res * 2 + self.up.insert(0, up) if len(self.up) != 0 else self.up.append(up) + self.norm_out = Normalize(block_in) + self.conv_out = paddle.nn.Conv2D( + in_channels=block_in, + out_channels=out_ch, + kernel_size=3, + stride=1, + padding=1, + padding_mode="circular", + ) + + def forward(self, x, t=None, dx=None): + if self._input_transform is not None: + x, t, dx, e = self._input_transform(x, self.config) + assert x.shape[2] == x.shape[3] == self.resolution + temb = get_timestep_embedding(t, self.ch) + temb = self.temb.dense[0](temb) + temb = nonlinearity(temb) + temb = self.temb.dense[1](temb) + x = self.conv_in(x) + if dx is not None: + cond_emb = self.emb_conv(dx) + else: + cond_emb = paddle.zeros_like(x=x) + x = paddle.concat(x=(x, cond_emb), axis=1) + hs = [self.combine_conv(x)] + for i_level in range(self.num_resolutions): + for i_block in range(self.num_res_blocks): + h = self.down[i_level].block[i_block](hs[-1], temb) + if len(self.down[i_level].attn) > 0: + h = self.down[i_level].attn[i_block](h) + hs.append(h) + if i_level != self.num_resolutions - 1: + hs.append(self.down[i_level].downsample(hs[-1])) + h = hs[-1] + h = self.mid.block_1(h, temb) + h = self.mid.attn_1(h) + h = self.mid.block_2(h, temb) + for i_level in reversed(range(self.num_resolutions)): + for i_block in range(self.num_res_blocks + 1): + h = self.up[i_level].block[i_block]( + paddle.concat(x=[h, hs.pop()], axis=1), temb + ) + if len(self.up[i_level].attn) > 0: + h = self.up[i_level].attn[i_block](h) + if i_level != 0: + h = self.up[i_level].upsample(h) + h = self.norm_out(h) + h = nonlinearity(h) + h = self.conv_out(h) + if self._output_transform is not None: + outputs = (h, e) + result_dict = self._output_transform(outputs) + return result_dict + return h \ No newline at end of file