Merge pull request #1360 from Zrealshadow/dev-postgresql-patch-10

nudles · web-flow · commit ef65cab83cad · 2026-01-06T22:03:16.000+08:00
Add the training script for the autograd convolutional neural network
diff --git a/examples/singa_peft/examples/autograd/mnist_cnn.py b/examples/singa_peft/examples/autograd/mnist_cnn.py
@@ -161,3 +161,144 @@ def data_partition(dataset_x, dataset_y, global_rank, world_size):
     idx_start = global_rank * data_per_rank
     idx_end = (global_rank + 1) * data_per_rank
     return dataset_x[idx_start:idx_end], dataset_y[idx_start:idx_end]
+
+
+def train_mnist_cnn(DIST=False,
+                    local_rank=None,
+                    world_size=None,
+                    nccl_id=None,
+                    spars=0,
+                    topK=False,
+                    corr=True):
+
+    # Define the hypermeters for the mnist_cnn
+    max_epoch = 10
+    batch_size = 64
+    sgd = opt.SGD(lr=0.005, momentum=0.9, weight_decay=1e-5)
+
+    # Prepare training and valadiation data
+    train_x, train_y, test_x, test_y = load_dataset()
+    IMG_SIZE = 28
+    num_classes = 10
+    train_y = to_categorical(train_y, num_classes)
+    test_y = to_categorical(test_y, num_classes)
+
+    # Normalization
+    train_x = train_x / 255
+    test_x = test_x / 255
+
+    if DIST:
+        # For distributed GPU training
+        sgd = opt.DistOpt(sgd,
+                          nccl_id=nccl_id,
+                          local_rank=local_rank,
+                          world_size=world_size)
+        dev = device.create_cuda_gpu_on(sgd.local_rank)
+
+        # Dataset partition for distributed training
+        train_x, train_y = data_partition(train_x, train_y, sgd.global_rank,
+                                          sgd.world_size)
+        test_x, test_y = data_partition(test_x, test_y, sgd.global_rank,
+                                        sgd.world_size)
+        world_size = sgd.world_size
+    else:
+        # For single GPU
+        dev = device.create_cuda_gpu()
+        world_size = 1
+
+    # Create model
+    model = CNN()
+
+    tx = tensor.Tensor((batch_size, 1, IMG_SIZE, IMG_SIZE), dev, tensor.float32)
+    ty = tensor.Tensor((batch_size, num_classes), dev, tensor.int32)
+    num_train_batch = train_x.shape[0] // batch_size
+    num_test_batch = test_x.shape[0] // batch_size
+    idx = np.arange(train_x.shape[0], dtype=np.int32)
+
+    if DIST:
+        #Sychronize the initial parameters
+        autograd.training = True
+        x = np.random.randn(batch_size, 1, IMG_SIZE,
+                            IMG_SIZE).astype(np.float32)
+        y = np.zeros(shape=(batch_size, num_classes), dtype=np.int32)
+        tx.copy_from_numpy(x)
+        ty.copy_from_numpy(y)
+        out = model.forward(tx)
+        loss = autograd.softmax_cross_entropy(out, ty)
+        for p, g in autograd.backward(loss):
+            synchronize(p, sgd)
+
+    # Training and evaulation loop
+    for epoch in range(max_epoch):
+        start_time = time.time()
+        np.random.shuffle(idx)
+
+        if ((DIST == False) or (sgd.global_rank == 0)):
+            print('Starting Epoch %d:' % (epoch))
+
+        # Training phase
+        autograd.training = True
+        train_correct = np.zeros(shape=[1], dtype=np.float32)
+        test_correct = np.zeros(shape=[1], dtype=np.float32)
+        train_loss = np.zeros(shape=[1], dtype=np.float32)
+
+        for b in range(num_train_batch):
+            x = train_x[idx[b * batch_size:(b + 1) * batch_size]]
+            x = augmentation(x, batch_size)
+            y = train_y[idx[b * batch_size:(b + 1) * batch_size]]
+            tx.copy_from_numpy(x)
+            ty.copy_from_numpy(y)
+            out = model.forward(tx)
+            loss = autograd.softmax_cross_entropy(out, ty)
+            train_correct += accuracy(tensor.to_numpy(out), y)
+            train_loss += tensor.to_numpy(loss)[0]
+            if DIST:
+                if (spars == 0):
+                    sgd.backward_and_update(loss, threshold=50000)
+                else:
+                    sgd.backward_and_sparse_update(loss,
+                                                   spars=spars,
+                                                   topK=topK,
+                                                   corr=corr)
+            else:
+                sgd(loss)
+
+        if DIST:
+            # Reduce the evaluation accuracy and loss from multiple devices
+            reducer = tensor.Tensor((1,), dev, tensor.float32)
+            train_correct = reduce_variable(train_correct, sgd, reducer)
+            train_loss = reduce_variable(train_loss, sgd, reducer)
+
+        # Output the training loss and accuracy
+        if ((DIST == False) or (sgd.global_rank == 0)):
+            print('Training loss = %f, training accuracy = %f' %
+                  (train_loss, train_correct /
+                   (num_train_batch * batch_size * world_size)),
+                  flush=True)
+
+        # Evaluation phase
+        autograd.training = False
+        for b in range(num_test_batch):
+            x = test_x[b * batch_size:(b + 1) * batch_size]
+            y = test_y[b * batch_size:(b + 1) * batch_size]
+            tx.copy_from_numpy(x)
+            ty.copy_from_numpy(y)
+            out_test = model.forward(tx)
+            test_correct += accuracy(tensor.to_numpy(out_test), y)
+
+        if DIST:
+            # Reduce the evaulation accuracy from multiple devices
+            test_correct = reduce_variable(test_correct, sgd, reducer)
+
+        # Output the evaluation accuracy
+        if ((DIST == False) or (sgd.global_rank == 0)):
+            print('Evaluation accuracy = %f, Elapsed Time = %fs' %
+                  (test_correct / (num_test_batch * batch_size * world_size),
+                   time.time() - start_time),
+                  flush=True)
+
+
+if __name__ == '__main__':
+
+    DIST = False
+    train_mnist_cnn(DIST=DIST)