|
13 | 13 | 'NAF', |
14 | 14 | 'FreeFormJacobianTransform', |
15 | 15 | 'CNF', |
| 16 | + 'ConvCouplingTransform', |
| 17 | + 'Glow', |
16 | 18 | ] |
17 | 19 |
|
18 | 20 | import abc |
@@ -753,3 +755,197 @@ def __init__( |
753 | 755 | ) |
754 | 756 |
|
755 | 757 | super().__init__(transforms, base) |
| 758 | + |
| 759 | + |
| 760 | +class ConvCouplingTransform(TransformModule): |
| 761 | + r"""Creates a convolution coupling transformation. |
| 762 | +
|
| 763 | + Arguments: |
| 764 | + channels: The number of channels. |
| 765 | + context: The number of context channels. |
| 766 | + spatial: The number of spatial dimensions. |
| 767 | + univariate: The univariate transformation constructor. |
| 768 | + shapes: The shapes of the univariate transformation parameters. |
| 769 | + kwargs: Keyword arguments passed to :class:`zuko.nn.FCN`. |
| 770 | + """ |
| 771 | + |
| 772 | + def __init__( |
| 773 | + self, |
| 774 | + channels: int, |
| 775 | + context: int = 0, |
| 776 | + spatial: int = 2, |
| 777 | + univariate: Callable[..., Transform] = MonotonicAffineTransform, |
| 778 | + shapes: List[Size] = [(), ()], |
| 779 | + **kwargs, |
| 780 | + ): |
| 781 | + super().__init__() |
| 782 | + |
| 783 | + self.d = channels // 2 |
| 784 | + self.dim = -(spatial + 1) |
| 785 | + |
| 786 | + # Univariate transformation |
| 787 | + self.univariate = univariate |
| 788 | + self.shapes = list(map(Size, shapes)) |
| 789 | + self.sizes = [s.numel() for s in self.shapes] |
| 790 | + |
| 791 | + # Hyper network |
| 792 | + kwargs.setdefault('activation', nn.ELU) |
| 793 | + kwargs.setdefault('normalize', True) |
| 794 | + |
| 795 | + self.hyper = FCN( |
| 796 | + in_channels=self.d + context, |
| 797 | + out_channels=(channels - self.d) * sum(self.sizes), |
| 798 | + spatial=spatial, |
| 799 | + **kwargs, |
| 800 | + ) |
| 801 | + |
| 802 | + def extra_repr(self) -> str: |
| 803 | + base = self.univariate(*map(torch.randn, self.shapes)) |
| 804 | + |
| 805 | + return f'(base): {base}' |
| 806 | + |
| 807 | + def meta(self, y: Tensor, x: Tensor) -> Transform: |
| 808 | + if y is not None: |
| 809 | + x = torch.cat(broadcast(x, y, ignore=abs(self.dim)), dim=self.dim) |
| 810 | + |
| 811 | + total = sum(self.sizes) |
| 812 | + |
| 813 | + phi = self.hyper(x) |
| 814 | + phi = phi.unflatten(self.dim, (phi.shape[self.dim] // total, total)) |
| 815 | + phi = phi.movedim(self.dim, -1) |
| 816 | + phi = phi.split(self.sizes, -1) |
| 817 | + phi = (p.unflatten(-1, s + (1,)) for p, s in zip(phi, self.shapes)) |
| 818 | + phi = (p.squeeze(-1) for p in phi) |
| 819 | + |
| 820 | + return self.univariate(*phi) |
| 821 | + |
| 822 | + def forward(self, y: Tensor = None) -> Transform: |
| 823 | + return CouplingTransform(partial(self.meta, y), self.d, self.dim) |
| 824 | + |
| 825 | + |
| 826 | +class Glow(DistributionModule): |
| 827 | + r"""Creates a Glow-like multi-scale flow. |
| 828 | +
|
| 829 | + References: |
| 830 | + | Glow: Generative Flow with Invertible 1x1 Convolutions (Kingma et al., 2018) |
| 831 | + | https://arxiv.org/abs/1807.03039 |
| 832 | +
|
| 833 | + Arguments: |
| 834 | + shape: The shape of a sample. |
| 835 | + context: The number of context channels at each scale. |
| 836 | + transforms: The number of coupling transformations at each scale. |
| 837 | + kwargs: Keyword arguments passed to :class:`ConvCouplingTransform`. |
| 838 | + """ |
| 839 | + |
| 840 | + def __init__( |
| 841 | + self, |
| 842 | + shape: Size, |
| 843 | + context: Union[int, List[int]] = 0, |
| 844 | + transforms: List[int] = [8, 8, 8], |
| 845 | + **kwargs, |
| 846 | + ): |
| 847 | + super().__init__() |
| 848 | + |
| 849 | + channels, *space = shape |
| 850 | + spatial = len(space) |
| 851 | + dim = -len(shape) |
| 852 | + scales = len(transforms) |
| 853 | + |
| 854 | + assert all(s % 2**scales == 0 for s in space), ( |
| 855 | + f"'shape' cannot be downscaled {scales} times" |
| 856 | + ) |
| 857 | + |
| 858 | + if isinstance(context, int): |
| 859 | + context = [context] * len(transforms) |
| 860 | + |
| 861 | + self.flows = nn.ModuleList() |
| 862 | + self.bases = nn.ModuleList() |
| 863 | + |
| 864 | + for i, K in enumerate(transforms): |
| 865 | + flow = [] |
| 866 | + flow.append(Unconditional(PixelShuffleTransform, dim=dim)) |
| 867 | + |
| 868 | + channels = channels * 2**spatial |
| 869 | + space = [s // 2 for s in space] |
| 870 | + |
| 871 | + for _ in range(K): |
| 872 | + flow.extend([ |
| 873 | + Unconditional( |
| 874 | + PermutationTransform, |
| 875 | + torch.randperm(channels), |
| 876 | + dim=dim, |
| 877 | + buffer=True, |
| 878 | + ), |
| 879 | + Unconditional( |
| 880 | + LULinearTransform, |
| 881 | + torch.eye(channels), |
| 882 | + dim=dim, |
| 883 | + ), |
| 884 | + ConvCouplingTransform( |
| 885 | + channels=channels, |
| 886 | + context=context[i], |
| 887 | + spatial=spatial, |
| 888 | + **kwargs, |
| 889 | + ), |
| 890 | + ]) |
| 891 | + |
| 892 | + self.flows.append(nn.ModuleList(flow)) |
| 893 | + self.bases.append( |
| 894 | + Unconditional( |
| 895 | + DiagNormal, |
| 896 | + torch.zeros(channels // 2, *space), |
| 897 | + torch.ones(channels // 2, *space), |
| 898 | + ndims=spatial + 1, |
| 899 | + buffer=True, |
| 900 | + ) |
| 901 | + ) |
| 902 | + |
| 903 | + channels = channels // 2 |
| 904 | + |
| 905 | + self.bases.pop() |
| 906 | + self.bases.append( |
| 907 | + Unconditional( |
| 908 | + DiagNormal, |
| 909 | + torch.zeros(channels * 2, *space), |
| 910 | + torch.ones(channels * 2, *space), |
| 911 | + ndims=spatial + 1, |
| 912 | + buffer=True, |
| 913 | + ) |
| 914 | + ) |
| 915 | + |
| 916 | + def forward(self, y: Iterable[Tensor] = None) -> NormalizingFlow: |
| 917 | + r""" |
| 918 | + Arguments: |
| 919 | + y: A sequence of contexts :math:`y`. There should be one element :math:`y_i` |
| 920 | + per scale, but elements can be :py:`None`. |
| 921 | +
|
| 922 | + Returns: |
| 923 | + A multi-scale flow :math:`p(X | y)`. |
| 924 | + """ |
| 925 | + |
| 926 | + if y is None: |
| 927 | + y = [None] * len(self.flows) |
| 928 | + |
| 929 | + # Transforms |
| 930 | + transforms = [] |
| 931 | + context_shapes = [] |
| 932 | + |
| 933 | + for flow, base, y_i in zip(self.flows, self.bases, y): |
| 934 | + for t in flow: |
| 935 | + transforms.append(t(y_i)) |
| 936 | + |
| 937 | + transforms.append(DropTransform(base(y_i))) |
| 938 | + |
| 939 | + if y_i is not None: |
| 940 | + context_shapes.append(y_i.shape) |
| 941 | + |
| 942 | + # Base |
| 943 | + base = transforms.pop().dist |
| 944 | + dim = -len(base.event_shape) |
| 945 | + |
| 946 | + batch_shapes = (shape[:dim] for shape in context_shapes) |
| 947 | + batch_shape = torch.broadcast_shapes(*batch_shapes) |
| 948 | + |
| 949 | + base = base.expand(batch_shape) |
| 950 | + |
| 951 | + return NormalizingFlow(transforms, base) |
0 commit comments