matlab-deep-learning
diff --git a/‎.circleci/config.yml‎
Lines changed: 21 additions & 0 deletions b/‎.circleci/config.yml‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎test/gpt2/layer/tblock.m‎
Lines changed: 116 additions & 0 deletions b/‎test/gpt2/layer/tblock.m‎
Lines changed: 116 additions & 0 deletions
diff --git a/‎test/gpt2/tdownload.m‎
Lines changed: 30 additions & 0 deletions b/‎test/gpt2/tdownload.m‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎test/gpt2/tload.m‎
Lines changed: 55 additions & 0 deletions b/‎test/gpt2/tload.m‎
Lines changed: 55 additions & 0 deletions
diff --git a/‎test/gpt2/tmodel.m‎
Lines changed: 40 additions & 0 deletions b/‎test/gpt2/tmodel.m‎
Lines changed: 40 additions & 0 deletions
@@ -0,0 +1,21 @@
+version: 2.1
+orbs:
+  matlab: mathworks/[email protected]
+
+jobs:
+  build:
+    machine:
+      image: ubuntu-1604:201903-01
+    steps:
+      - checkout
+      - matlab/install:
+          # We want to ensure the repo works in R2020a
+          release: R2020a
+      - matlab/run-tests:
+          test-results-junit: artifacts/test_results/matlab/results.xml
+          # Have to add test/tools to the path for certain tests.
+          source-folder: .;test/tools
+      - store_test_results:
+          path: artifacts/test_results
+      - store_artifacts:
+          path: artifacts/
@@ -0,0 +1,2 @@
+*.asv
+gpt2-355M
@@ -0,0 +1,116 @@
+classdef tblock < matlab.unittest.TestCase
+    % tblock   Unit tests for transformer.layer.block
+    
+    % Copyright 2020 The MathWorks, Inc.
+    
+    properties(Constant, Access=private)
+        block = @gpt2.layer.block
+    end
+    
+    properties(TestParameter)
+        Input = struct(...
+            'Scalar', 1,...
+            'Vector', 1:5,...
+            'Matrix', reshape(1:6,[3,2]))
+    end
+    
+    methods(Test)
+        function outputHasInputSize(test,Input)
+            % The block is simply a composition of other layers. Simply
+            % verify the output of a block is the same size as the input,
+            % and as such the blocks can be stacked.
+            x = dlarray(Input);
+            C = size(Input,1);
+            weights = test.randomWeights(C);
+            hyperParameters.NumHeads = 1;
+            y = test.block(x,[],weights,hyperParameters);
+            test.verifySize(y,size(x));
+        end
+        
+        function outputHasInputSizeWithPasts(test,Input)
+            % As above but using "pasts" - a concatenation of key and value
+            % matrices.
+            x = dlarray(Input);
+            C = size(Input,1);
+            weights = test.randomWeights(C);
+            hyperParameters.NumHeads = 1;
+            % Provide a fake past of sequence length 1
+            K_fake = dlarray(rand(C,1));
+            V_fake = dlarray(rand(C,1));
+            past = cat(4,K_fake,V_fake);
+            [y,present] = test.block(x,past,weights,hyperParameters);
+            test.verifySize(y,size(x));
+            % The size of presents is the size of past except the sequence
+            % dimension gets extended by the sequence length of y
+            exp_present_size = size(past);
+            exp_present_size(2) = exp_present_size(2)+size(y,2);
+            test.verifySize(present,exp_present_size);
+        end
+    end
+    
+    methods(Access=private)
+        function weights = randomWeights(test,C)
+            % C is num features, or latent dimension of the block
+            g1 = dlarray(rand(C,1));
+            b1 = dlarray(rand(C,1));
+            g2 = dlarray(rand(C,1));
+            b2 = dlarray(rand(C,1));
+            W_A1 = dlarray(rand(3*C,C));
+            W_A2 = dlarray(rand(C));
+            b_A1 = dlarray(rand(3*C,1));
+            b_A2 = dlarray(rand(C,1));
+            W_P1 = dlarray(rand(C));
+            b_P1 = dlarray(rand(C,1));
+            W_P2 = dlarray(rand(C));
+            b_P2 = dlarray(rand(C,1));
+            weights = test.prepareBlockWeightsStruct(g1,b1,W_A1,b_A1,W_A2,b_A2,g2,b2,W_P1,b_P1,W_P2,b_P2);
+        end
+        
+        function s = prepareBlockWeightsStruct(test,g1,b1,W_A1,b_A1,W_A2,b_A2,g2,b2,W_P1,b_P1,W_P2,b_P2)
+            % Merge various structs that have the appropriate weight naming
+            % syntax.
+            s_ln = test.prepareLayerNormWeightsStruct(g1,b1,g2,b2);
+            s_attn = test.prepareAttentionWeightsStruct(W_A1,b_A1,W_A2,b_A2);
+            s_mlp = test.prepareMLPWeightsStruct(W_P1,b_P1,W_P2,b_P2);
+            c = {s_ln,s_attn,s_mlp};
+            fn = cellfun(@fieldnames,c,'UniformOutput',false);
+            fn = cat(1,fn{:});
+            fv = cellfun(@struct2cell,c,'UniformOutput',false);
+            fv = cat(1,fv{:});
+            s = struct();
+            for i = 1:numel(fn)
+                s.(fn{i}) = fv{i};
+            end
+        end
+        
+        function s = prepareAttentionWeightsStruct(~,W1,b1,W2,b2)
+            % Prepare a struct compatible with the weights input of
+            % attention. These are for the fully connected layers.
+            s = struct(...
+                'attn_c_attn_w_0',W1,...
+                'attn_c_attn_b_0',b1,...
+                'attn_c_proj_w_0',W2,...
+                'attn_c_proj_b_0',b2);
+        end
+        
+        function s = prepareLayerNormWeightsStruct(~,g1,b1,g2,b2)
+            % Prepare a struct of weights compatible with the two layer
+            % norm calls in block
+            s = struct(...
+                'ln_1_g_0',g1,...
+                'ln_1_b_0',b1,...
+                'ln_2_g_0',g2,...
+                'ln_2_b_0',b2);
+        end
+        
+        function s = prepareMLPWeightsStruct(~,W1,b1,W2,b2)
+            % Create a struct of weights to be consumed by
+            % transformer.layer.multiLayerPerceptron
+            s = struct(...
+                'mlp_c_fc_w_0',W1,...
+                'mlp_c_fc_b_0',b1,...
+                'mlp_c_proj_w_0',W2,...
+                'mlp_c_proj_b_0',b2);
+        end
+    end
+end
@@ -0,0 +1,30 @@
+classdef(SharedTestFixtures = {DownloadGPT2Fixture}) tdownload < matlab.unittest.TestCase
+    % tdownload   Tests for gpt2.download
+    
+    % Copyright 2020 The MathWorks, Inc.
+    
+    % downloadGPT2Fixture.setup calls gpt2.download so this test is just a
+    % sanity check that the required files are downloaded.
+    
+    properties(Constant)
+        ExpectedDataDir = fullfile(getRepoRoot(),'gpt2-355M')
+        ExpectedFiles = ["parameters.mat","vocab.bpe","encoder.txt"]
+    end
+    
+    methods(Test)
+        function verifyFilesExist(test)
+            test.assertEqual(exist(test.ExpectedDataDir,"dir"),7,...
+                "Expected download directory for gpt2-355M not created.");
+            files = dir(test.ExpectedDataDir);
+            filenames = {files.name};
+            import matlab.unittest.constraints.IsSupersetOf
+            test.verifyThat(filenames,IsSupersetOf(test.ExpectedFiles),...
+                "Expected files not downloaded for gpt2-355M.");
+            import matlab.unittest.constraints.IsSameSetAs
+            % dir picks up "." and ".." too.
+            test.verifyThat(setdiff(filenames,test.ExpectedFiles), IsSameSetAs([".",".."]),...
+                "Unexpected files downloaded for gpt2-355M.");
+        end
+    end
+end
+        
@@ -0,0 +1,55 @@
+classdef(SharedTestFixtures = {DownloadGPT2Fixture}) tload < matlab.unittest.TestCase
+    % tload   Test for gpt2.load
+    
+    % Copyright 2020 The MathWorks, Inc.
+    
+    properties(Constant)
+        ExpectedNumHeads = 16
+        ExpectedNumLayers = 24
+        ExpectedContext = 1024
+    end
+    
+    properties
+        Parameters
+    end
+    
+    methods(TestClassSetup)
+        function loadParameters(test)
+            % Load the parameters once for all tests
+            test.Parameters = gpt2.load(fullfile(getRepoRoot,"gpt2-355M","parameters.mat"));
+        end
+    end
+    
+    methods(Test)
+        function verifyLoadStructFields(test)
+            % Verify the expected fieldnames of the loaded struct
+            import matlab.unittest.constraints.IsSameSetAs
+            expected = ["Hyperparameters","Weights"];
+            test.verifyThat(fieldnames(test.Parameters), IsSameSetAs(expected));
+        end
+        
+        function verifyHyperparameters(test)
+            % Verify the 355M config
+            hyperParams = test.Parameters.Hyperparameters;
+            test.verifyEqual(hyperParams.NumHeads,test.ExpectedNumHeads,...
+                "Unexpected value for Hyperparameters.NumHeads");
+            test.verifyEqual(hyperParams.NumLayers,test.ExpectedNumLayers,...
+                "Unexpected value for Hyperparameters.NumLayers");
+            test.verifyEqual(hyperParams.NumContext,test.ExpectedContext,...
+                "Unexpected value for Hyperparameters.NumContext");
+        end
+        
+        function verifyWeights(test)
+            % Verify the structure of the Weights field and check some
+            % particular weight has the expected type.
+            
+            % Here there is an implicit check that "model_" has been
+            % removed from the weight names and the flat parameters.mat has
+            % been organised into a heirarchy for each gpt2.block
+            w = test.assertWarningFree(@() test.Parameters.Weights.h0.ln_1_g_0);
+            import matlab.unittest.constraints.IsOfClass
+            test.verifyThat(w,IsOfClass('dlarray'));
+            test.verifyThat(extractdata(w),IsOfClass('single'));
+        end
+    end    
+end
@@ -0,0 +1,40 @@
+classdef(SharedTestFixtures = {DownloadGPT2Fixture}) tmodel < matlab.unittest.TestCase
+    % tmodel   Tests for gpt2.model
+    
+    % Copyright 2020 The MathWorks, Inc.
+    
+    properties(Constant)
+        model = @gpt2.model
+    end
+    
+    methods(Test)
+        function canUseModel(test)
+            inputs = test.prepareInputs();
+            test.verifyWarningFree(@() test.model(inputs{:}));
+        end
+    end
+    
+    methods(Access=private)
+        function inputs = prepareInputs(test)
+            % Convenience method to setup inputs for
+            % transformer.model
+            X = test.prepareX();            
+            parameters = test.prepareParameters();
+            pasts = test.preparePasts(parameters.Hyperparameters.NumLayers);
+            inputs = {X,pasts,parameters};
+        end
+        
+        function X = prepareX(~)
+            X = dlarray(1);
+        end
+        
+        function pasts = preparePasts(~,numLayers)
+            pasts = cell(numLayers,1);
+        end
+        
+        function parameters = prepareParameters(~)
+            parametersFile = fullfile(getRepoRoot(),'gpt2-355M','parameters.mat');
+            parameters = gpt2.load(parametersFile);
+        end
+    end
+end