refactor everything

Justin Tan (unimelb) · Justin Tan (unimelb) · commit a950ee7087bc · 2020-09-07T22:07:41.000+10:00
diff --git a/src/compression/ans.py b/src/compression/ans.py
@@ -60,7 +60,6 @@ def push(x, starts, freqs, precisions):
         precision:  Determines normalization factor of probability distribution.
     """
     head, tail = x
-
     assert head.shape == starts.shape == freqs.shape, (
         f"Inconsistent encoder shapes! head: {head.shape} | "
         f"starts: {starts.shape} | freqs: {freqs.shape}")
diff --git a/src/compression/entropy_coding.py b/src/compression/entropy_coding.py
@@ -7,7 +7,7 @@
 
 OVERFLOW_WIDTH = 4
 OVERFLOW_CODE = 1 << (1 << OVERFLOW_WIDTH)
-PATCH_SIZE = (1,2)
+PATCH_SIZE = (1,1)
 
 import torch
 import numpy as np
@@ -57,11 +57,10 @@ def _enc_statfun(value):
         # (coding_shape) = (C,H,W) by default but canbe generalized
         # cdf_i: [(coding_shape), pmf_length + 2]
         # value: [(coding_shape)]
-        lower = np.squeeze(np.take_along_axis(cdf_i, 
-            np.expand_dims(value, -1), axis=-1))
-        upper = np.squeeze(np.take_along_axis(cdf_i, 
-            np.expand_dims(value + 1, -1), axis=-1))
-
+        lower = np.take_along_axis(cdf_i, 
+            np.expand_dims(value, -1), axis=-1)[..., 0]
+        upper = np.take_along_axis(cdf_i, 
+            np.expand_dims(value + 1, -1), axis=-1)[..., 0]
         return lower, upper - lower
 
     return _enc_statfun
@@ -280,14 +279,11 @@ def vec_ans_index_encoder(symbols, indices, cdf, cdf_length, cdf_offset, precisi
     if B == 1:
         # Vectorize on patches - there's probably a way to interlace patches with
         # batch elements for B > 1 ...
-        print('og', values.sh
         if ((symbols_shape[2] % PATCH_SIZE[0] == 0) and (symbols_shape[3] % PATCH_SIZE[1] == 0)) is False:
             values = utils.pad_factor(torch.Tensor(values), symbols_shape[2:], 
                 factor=PATCH_SIZE).cpu().numpy().astype(np.int32)
             indices = utils.pad_factor(torch.Tensor(indices), symbols_shape[2:], 
                 factor=PATCH_SIZE).cpu().numpy().astype(np.int32)
-        print(values.shape)
-        print(symbols.shape)
 
         assert (values.shape[2] % PATCH_SIZE[0] == 0) and (values.shape[3] % PATCH_SIZE[1] == 0)
         assert (indices.shape[2] % PATCH_SIZE[0] == 0) and (indices.shape[3] % PATCH_SIZE[1] == 0)
diff --git a/src/compression/prior_model.py b/src/compression/prior_model.py
@@ -317,15 +317,15 @@ def forward(self, x, mean, scale, **kwargs):
 
     import time
 
-    n_channels = 64
+    n_channels = 256
     use_blocks = True
     vectorize = True
     prior_density = PriorDensity(n_channels)
     prior_entropy_model = PriorEntropyModel(distribution=prior_density)
 
     loc, scale = 2.401, 3.43
     n_data = 1
-    toy_shape = (n_data, n_channels, 149, 175)
+    toy_shape = (n_data, n_channels, 34, 50)
     bottleneck, means = torch.randn(toy_shape), torch.randn(toy_shape)
     scales = torch.randn(toy_shape) * np.sqrt(scale) + loc
     scales = torch.clamp(scales, min=MIN_SCALE)
diff --git a/src/hyperprior.py b/src/hyperprior.py
@@ -143,7 +143,7 @@ class Hyperprior(CodingModel):
     
     def __init__(self, bottleneck_capacity=220, hyperlatent_filters=LARGE_HYPERLATENT_FILTERS, 
         mode='large', likelihood_type='gaussian', scale_lower_bound=MIN_SCALE, entropy_code=False,
-        vectorize_encoding=False, block_encode=True):
+        vectorize_encoding=True, block_encode=True):
 
         """
         Introduces probabilistic model over latents of 
diff --git a/src/model.py b/src/model.py
@@ -133,7 +133,6 @@ def compression_forward(self, x):
         if self.model_mode == ModelModes.EVALUATION and (self.training is False):
             n_encoder_downsamples = self.Encoder.n_downsampling_layers
             factor = 2 ** n_encoder_downsamples
-            self.logger.info('Padding input image by {}'.format(factor))
             x = utils.pad_factor(x, x.size()[2:], factor)
 
         # Encoder forward pass
@@ -142,7 +141,6 @@ def compression_forward(self, x):
         if self.model_mode == ModelModes.EVALUATION and (self.training is False):
             n_hyperencoder_downsamples = self.Hyperprior.analysis_net.n_downsampling_layers
             factor = 2 ** n_hyperencoder_downsamples
-            self.logger.info('Padding latents by {}'.format(factor))
             y = utils.pad_factor(y, y.size()[2:], factor)
 
         hyperinfo = self.Hyperprior(y, spatial_shape=x.size()[2:])
@@ -281,7 +279,6 @@ def compress(self, x):
         if self.model_mode == ModelModes.EVALUATION and (self.training is False):
             n_encoder_downsamples = self.Encoder.n_downsampling_layers
             factor = 2 ** n_encoder_downsamples
-            self.logger.info('Padding input image to {}'.format(factor))
             x = utils.pad_factor(x, x.size()[2:], factor)
 
         # Encoder forward pass
@@ -290,21 +287,23 @@ def compress(self, x):
         if self.model_mode == ModelModes.EVALUATION and (self.training is False):
             n_hyperencoder_downsamples = self.Hyperprior.analysis_net.n_downsampling_layers
             factor = 2 ** n_hyperencoder_downsamples
-            self.logger.info('Padding latents to {}'.format(factor))
             y = utils.pad_factor(y, y.size()[2:], factor)
 
         compression_output = self.Hyperprior.compress_forward(y, spatial_shape)
         attained_hbpp = 32 * len(compression_output.hyperlatents_encoded) / np.prod(spatial_shape)
         attained_lbpp = 32 * len(compression_output.latents_encoded) / np.prod(spatial_shape)
         attained_bpp = 32 * ((len(compression_output.hyperlatents_encoded) +  
             len(compression_output.latents_encoded)) / np.prod(spatial_shape))
-        print('BPP', compression_output.total_bpp)
-        print('h BPP', compression_output.hyperlatent_bpp)
-        print('l BPP', compression_output.latent_bpp)
 
-        print('Actual BPP', attained_bpp)
-        print('h BPP', attained_hbpp)
-        print('l BPP', attained_lbpp)
+        self.logger.info('[ESTIMATED]')
+        self.logger.info(f'BPP: {compression_output.total_bpp:.3f}')
+        self.logger.info(f'HL BPP: {compression_output.hyperlatent_bpp:.3f}')
+        self.logger.info(f'L BPP: {compression_output.latent_bpp:.3f}')
+
+        self.logger.info('[ATTAINED]')
+        self.logger.info(f'BPP: {attained_bpp:.3f}')
+        self.logger.info(f'HL BPP: {attained_hbpp:.3f}')
+        self.logger.info(f'L BPP: {attained_lbpp:.3f}')
         return compression_output