rasmusbergpalm · oxinabox · Dec 16, 2014 · Jan 2, 2015
diff --git a/DBN/dbnsetup.m b/DBN/dbnsetup.m
@@ -1,19 +1,17 @@
-function dbn = dbnsetup(dbn, x, opts)
+function dbn = dbnsetup(dbn, x, opts, types)
+    %types should be a cell array of strings specifying the type of RBM to be used.
+    % 'gb'= Gaussian Bernoulli 'bb' = Bernoulli Bernoulli. A type specifier is needed for each layer.
+    % Be aware that Gaussian Bernoulli RBMs are comparitively unstable and require a learning rate ~1/10th that of a Bernoulli Bernoulli RBM (See https://www.cs.toronto.edu/~hinton/absps/guideTR.pdf)
+
     n = size(x, 2);
     dbn.sizes = [n, dbn.sizes];
+
+    if nargin<4 || length(types)<length(dbn.sizes)-1
+       types = repmat({'bb'},1,length(dbn.sizes)-1);
+    end
+
 
     for u = 1 : numel(dbn.sizes) - 1
-        dbn.rbm{u}.alpha    = opts.alpha;
-        dbn.rbm{u}.momentum = opts.momentum;
-
-        dbn.rbm{u}.W  = zeros(dbn.sizes(u + 1), dbn.sizes(u));
-        dbn.rbm{u}.vW = zeros(dbn.sizes(u + 1), dbn.sizes(u));
-
-        dbn.rbm{u}.b  = zeros(dbn.sizes(u), 1);
-        dbn.rbm{u}.vb = zeros(dbn.sizes(u), 1);
-
-        dbn.rbm{u}.c  = zeros(dbn.sizes(u + 1), 1);
-        dbn.rbm{u}.vc = zeros(dbn.sizes(u + 1), 1);
+        dbn.rbm{u} = rbmsetup(dbn.sizes(u),dbn.sizes(u + 1), types{u}, opts);
     end
-
 end
diff --git a/DBN/rbmdown.m b/DBN/rbmdown.m
@@ -1,3 +1,10 @@
 function x = rbmdown(rbm, x)
-    x = sigm(repmat(rbm.b', size(x, 1), 1) + x * rbm.W);
+    expected = repmat(rbm.b', size(x, 1), 1) + x * rbm.W;
+    if strcmp(rbm.type,'gb')
+        %Then is Gaussian-Bernboilli so is mean of normal
+        x = expected;
+    else
+        %assume it is bernoilli-bernoili so exected is a sigmoid of inputs
+        x = sigm(expected);
+    end
 end
diff --git a/DBN/rbmsetup.m b/DBN/rbmsetup.m
@@ -0,0 +1,20 @@
+function rbm = rbmsetup(inputsize,hiddensize, type, opts)
+    %type should be a strings specifying the type of RBM to be used.
+    % 'gb'= Gaussian Bernoulli 'bb' = Bernoulli Bernoulli. 
+    % Be aware that Gaussian Bernoulli RBMs are comparitively unstable and require a learning rate ~1/10th that of a Bernoulli Bernoulli RBM (See https://www.cs.toronto.edu/~hinton/absps/guideTR.pdf)
+        rbm.type = type;
+        rbm.alpha    = opts.alpha;
+        rbm.momentum = opts.momentum;
+
+        rbm.numepochs = opts.numepochs;
+
+        rbm.W  = zeros(hiddensize, inputsize);
+        rbm.W  = normrnd(rbm.W ,0.01); 
+        rbm.vW = zeros(hiddensize, inputsize);
+
+        rbm.b  = zeros(inputsize, 1); 
+        rbm.vb = zeros(inputsize, 1);
+
+        rbm.c  = zeros(hiddensize, 1);
+        rbm.vc = zeros(hiddensize, 1);
+end
diff --git a/DBN/rbmtrain.m b/DBN/rbmtrain.m
@@ -1,20 +1,44 @@
 function rbm = rbmtrain(rbm, x, opts)
     assert(isfloat(x), 'x must be a float');
-    assert(all(x(:)>=0) && all(x(:)<=1), 'all data in x must be in [0:1]');
+
+    assert(strcmp(rbm.type,'gb') || all(x(:)>=0) && all(x(:)<=1), 'all data in x must be in [0:1], unless gb rbm');
     m = size(x, 1);
-    numbatches = m / opts.batchsize;
 
-    assert(rem(numbatches, 1) == 0, 'numbatches not integer');
-
-    for i = 1 : opts.numepochs
+    if (~mod(m,opts.batchsize))
+       warning('WRX:BATCH','training data could not be divided into even batchs. Some data was disguarded'); 
+    end
+
+    numbatches = floor(m / opts.batchsize);
+
+
+
+    if(strcmp(rbm.type,'gb'))
+        v = var(x);
+        assert(all(v(v~=0)<=1+0.01) && all(v(v~=0)>=1-0.01), 'for gb rbm: x must have featurewise variance of 0 or 1');
+    else 
+        %assume type='bb'
+        assert(all(x(:)>=0) && all(x(:)<=1), 'for bb rbm: all data in x must be in [0:1]');
+    end
+
+    for i = 1 : rbm.numepochs
         kk = randperm(m);
         err = 0;
         for l = 1 : numbatches
             batch = x(kk((l - 1) * opts.batchsize + 1 : l * opts.batchsize), :);
 
             v1 = batch;
             h1 = sigmrnd(repmat(rbm.c', opts.batchsize, 1) + v1 * rbm.W');
-            v2 = sigmrnd(repmat(rbm.b', opts.batchsize, 1) + h1 * rbm.W);
+
+            if strcmp(rbm.type,'gb')
+                %Then is Gaussian Bernboillim so sample from normal to get
+                % visible
+                mu = (repmat(rbm.b', opts.batchsize, 1) + h1 * rbm.W);
+                v2 = normrnd(mu,1.00);
+            else
+                %assume it is bernoilli bernoili so sample from sigmoid
+                v2 = sigmrnd(repmat(rbm.b', opts.batchsize, 1) + h1 * rbm.W);
+            end
+
             h2 = sigm(repmat(rbm.c', opts.batchsize, 1) + v2 * rbm.W');
 
             c1 = h1' * v1;
@@ -31,7 +55,7 @@
             err = err + sum(sum((v1 - v2) .^ 2)) / opts.batchsize;
         end
 
-        disp(['epoch ' num2str(i) '/' num2str(opts.numepochs)  '. Average reconstruction error is: ' num2str(err / numbatches)]);
+        disp(['epoch ' num2str(i) '/' num2str( rbm.numepochs)  '. Average reconstruction error is: ' num2str(err / numbatches)]);
 
     end
 end
diff --git a/REFS.md b/REFS.md
@@ -2,7 +2,9 @@ Deep Belief Nets
 ----------------
 
 * ["A Fast Learning Algorithm for Deep Belief Nets"](http://www.cs.toronto.edu/~hinton/absps/ncfast.pdf) Geoffrey Hinton 2006 - Introduces contrastive divergence and DBNs
-* ["A Practical Guide to Training Restricted Boltzmann Machines"](http://www.cs.toronto.edu/~hinton/absps/guideTR.pdf) Geoffrey Hinton 2010 - How to implement DBNs
+* ["Greedy layer-wise training of deep networks."](http://oldbooks.nips.cc/papers/files/nips19/NIPS2006_0739.pdf) Yoshua Bengio, Pascal Lamblin, Dan Popovici, and Hugo Larochelle 2007 - Further details of DBNs, including Gaussian Bernoulli units.
+* ["A Practical Guide to Training Restricted Boltzmann Machines"](http://www.cs.toronto.edu/~hinton/absps/guideTR.pdf) Geoffrey Hinton 2010 - Advice in hyper-parameter tuning for DBNs
+
 
 Convolutional Neural Nets
 -------------------------

diff --git a/tests/test_example_DBN.m b/tests/test_example_DBN.m
@@ -36,6 +36,40 @@
 opts.numepochs =  1;
 opts.batchsize = 100;
 nn = nntrain(nn, train_x, train_y, opts);
+
 [er, bad] = nntest(nn, test_x, test_y);
+fprintf('dbn ex2 er:%f\n', er);
+assert(er < 0.10, 'Too big error');
+
+
+%%  ex3 train a 100-100 hidden unit DBN, with a gaussian input layer and use its weights to initialize a NN, with a softmax output
+rand('state',0)
+
+% normalize Inputs (the Gaussian input layer requires unit variance)
+[train_x, mu, sigma] = zscore(train_x);
+test_x = normalize(test_x, mu, sigma);
+
+
+%train dbn
+dbn.sizes = [100 100];
+opts.numepochs =   1;
+opts.batchsize = 100;
+opts.momentum  =   0.9;
+opts.alpha     =   0.1;   %set default for all layers
+dbn = dbnsetup(dbn, train_x, opts,  {'gb','bb'});
+dbn.rbm{1}.alpha = 0.003; %Gaussian RBMs are unstable for large learning rates, so drop this down abit compaired to other layers.
 
+dbn = dbntrain(dbn, train_x, opts);
+
+%unfold dbn to nn
+nn = dbnunfoldtonn(dbn, 10);
+nn.activation_function = 'sigm';
+nn.output = 'softmax';     %  use softmax output
+
+%train nn
+nn = nntrain(nn, train_x, train_y, opts);
+[er, bad] = nntest(nn, test_x, test_y);
+fprintf('dbn ex3 er:%f\n', er);
 assert(er < 0.10, 'Too big error');
+
+