Skip to content

Commit 71f4aad

Browse files
committed
lab2 pt student
1 parent b3a204e commit 71f4aad

File tree

3 files changed

+67
-141
lines changed

3 files changed

+67
-141
lines changed

lab2/PT_Part1_MNIST.ipynb

Lines changed: 41 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010
" <td align=\"center\"><a target=\"_blank\" href=\"http://introtodeeplearning.com\">\n",
1111
" <img src=\"https://i.ibb.co/Jr88sn2/mit.png\" style=\"padding-bottom:5px;\" />\n",
1212
" Visit MIT Deep Learning</a></td>\n",
13-
" <td align=\"center\"><a target=\"_blank\" href=\"https://colab.research.google.com/github/aamini/introtodeeplearning/blob/master/lab2/solutions/PT_Part1_MNIST_Solution.ipynb\">\n",
13+
" <td align=\"center\"><a target=\"_blank\" href=\"https://colab.research.google.com/github/MITDeepLearning/introtodeeplearning/blob/master/lab2/PT_Part1_MNIST.ipynb\">\n",
1414
" <img src=\"https://i.ibb.co/2P3SLwK/colab.png\" style=\"padding-bottom:5px;\" />Run in Google Colab</a></td>\n",
15-
" <td align=\"center\"><a target=\"_blank\" href=\"https://github.com/aamini/introtodeeplearning/blob/master/lab2/solutions/PT_Part1_MNIST_Solution.ipynb\">\n",
15+
" <td align=\"center\"><a target=\"_blank\" href=\"https://github.com/MITDeepLearning/introtodeeplearning/blob/master/lab2/PT_Part1_MNIST.ipynb\">\n",
1616
" <img src=\"https://i.ibb.co/xfJbPmL/github.png\" height=\"70px\" style=\"padding-bottom:5px;\" />View Source on GitHub</a></td>\n",
1717
"</table>\n",
1818
"\n",
@@ -252,13 +252,9 @@
252252
"\n",
253253
" # '''TODO: Define the activation function for the first fully connected (Dense/Linear) layer.'''\n",
254254
" nn.Linear(28 * 28, 128),\n",
255-
" nn.ReLU(),\n",
256-
" # activation function = '''TODO'''\n",
255+
" '''TODO'''\n",
257256
"\n",
258-
" # '''TODO: Define the second Linear layer to output the classification probabilities'''\n",
259-
" nn.Linear(128, 10),\n",
260-
" nn.Softmax(dim=1) # Softmax activation for probabilities\n",
261-
" # '''TODO: Linear layer and activation to output classification probabilities'''\n",
257+
" '''TODO: Define the second Linear layer to output the classification probabilities'''\n",
262258
" )\n",
263259
" return fc_model\n",
264260
"\n",
@@ -321,27 +317,17 @@
321317
" self.fc1 = nn.Linear(28 * 28, 128)\n",
322318
"\n",
323319
" # '''TODO: Define the activation function for the first fully connected layer'''\n",
324-
" self.relu = nn.ReLU()\n",
320+
" self.relu = # TODO\n",
325321
"\n",
326322
" # '''TODO: Define the second Linear layer to output the classification probabilities'''\n",
327-
" self.fc2 = nn.Linear(128, 10)\n",
328-
" self.softmax = nn.Softmax(dim=1)\n",
329-
" # self.fc2 = # TODO\n",
330-
" # self.softmax = # TODO\n",
323+
" self.fc2 = # TODO\n",
331324
"\n",
332325
" def forward(self, x):\n",
333326
" x = self.flatten(x)\n",
334327
" x = self.fc1(x)\n",
335328
"\n",
336329
" # '''TODO: Implement the rest of forward pass of the model using the layers you have defined above'''\n",
337-
" x = self.relu(x)\n",
338-
" x = self.fc2(x)\n",
339-
" # '''TODO'''\n",
340-
"\n",
341-
" '''NOTE: In Pytorch, softmax is omitted in training since CrossEntropyLoss includes\n",
342-
" LogSoftmax; using both would result in incorrect loss values.\n",
343-
" Since we will train with CrossEntropyLoss, we do not need something like:\n",
344-
" x = self.softmax(x) '''\n",
330+
" '''TODO'''\n",
345331
"\n",
346332
" return x\n",
347333
"\n",
@@ -430,15 +416,18 @@
430416
" for images, labels in trainset_loader:\n",
431417
" # Move tensors to GPU so compatible with model\n",
432418
" images, labels = images.to(device), labels.to(device)\n",
433-
" # Clear gradients before performing backward pass\n",
434-
" optimizer.zero_grad()\n",
419+
"\n",
435420
" # Forward pass\n",
436421
" outputs = fc_model(images)\n",
422+
"\n",
423+
" # Clear gradients before performing backward pass\n",
424+
" optimizer.zero_grad()\n",
437425
" # Calculate loss based on model predictions\n",
438426
" loss = loss_function(outputs, labels)\n",
439427
" # Backpropagate and update model parameters\n",
440428
" loss.backward()\n",
441429
" optimizer.step()\n",
430+
"\n",
442431
" # multiply loss by total nos. of samples in batch\n",
443432
" total_loss += loss.item()*images.size(0)\n",
444433
"\n",
@@ -463,8 +452,7 @@
463452
"source": [
464453
"# TODO: Train the model by calling the function appropriately\n",
465454
"EPOCHS = 5\n",
466-
"train(fc_model, trainset_loader, loss_function, optimizer, EPOCHS)\n",
467-
"# train('''TODO''') # TODO\n",
455+
"train('''TODO''') # TODO\n",
468456
"\n",
469457
"comet_model_1.end()"
470458
]
@@ -512,39 +500,33 @@
512500
" with torch.no_grad():\n",
513501
" for images, labels in testset_loader:\n",
514502
" # TODO: ensure evalaution happens on the GPU\n",
515-
" images, labels = images.to(device), labels.to(device)\n",
516-
" # images, labels = # TODO\n",
503+
" images, labels = # TODO\n",
517504
"\n",
518505
" # TODO: feed the images into the model and obtain the predictions (forward pass)\n",
519-
" outputs = model(images)\n",
520-
" # outputs = # TODO\n",
506+
" outputs = # TODO\n",
521507
"\n",
522508
" loss = loss_function(outputs, labels)\n",
523509
"\n",
524510
" # TODO: Calculate test loss\n",
525-
" test_loss += loss.item() * images.size(0)\n",
526-
" # test_loss += # TODO\n",
511+
" test_loss += # TODO\n",
527512
"\n",
528513
" '''TODO: make a prediction and determine whether it is correct!'''\n",
529514
" # TODO: identify the digit with the highest probability prediction for the images in the test dataset.\n",
530-
" predicted = torch.argmax(outputs, dim=1)\n",
531-
" # predicted = # TODO\n",
515+
" predicted = # torch.argmax('''TODO''')\n",
532516
"\n",
533517
" # TODO: tally the number of correct predictions\n",
534-
" correct_pred += (predicted == labels).sum().item()\n",
535-
" # correct_pred += TODO\n",
518+
" correct_pred += TODO\n",
519+
"\n",
536520
" # TODO: tally the total number of predictions\n",
537-
" total_pred += labels.size(0)\n",
538-
" # total_pred += TODO\n",
521+
" total_pred += TODO\n",
539522
"\n",
540523
" # Compute average loss and accuracy\n",
541524
" test_loss /= total_pred\n",
542525
" test_acc = correct_pred / total_pred\n",
543526
" return test_loss, test_acc\n",
544527
"\n",
545528
"# TODO: call the evaluate function to evaluate the trained model!!\n",
546-
"test_loss, test_acc = evaluate(fc_model, trainset_loader, loss_function)\n",
547-
"# test_loss, test_acc = # TODO\n",
529+
"test_loss, test_acc = # TODO\n",
548530
"\n",
549531
"print('Test accuracy:', test_acc)"
550532
]
@@ -607,29 +589,24 @@
607589
" def __init__(self):\n",
608590
" super(CNN, self).__init__()\n",
609591
" # TODO: Define the first convolutional layer\n",
610-
" self.conv1 = nn.Conv2d(1, 24, kernel_size=3)\n",
611-
" # self.conv1 = # TODO\n",
592+
" self.conv1 = # TODO\n",
612593
"\n",
613594
" # TODO: Define the first max pooling layer\n",
614-
" self.pool1 = nn.MaxPool2d(kernel_size=2)\n",
615-
" # self.pool1 = # TODO\n",
595+
" self.pool1 = # TODO\n",
616596
"\n",
617597
" # TODO: Define the second convolutional layer\n",
618-
" self.conv2 = nn.Conv2d(24, 36, kernel_size=3)\n",
619-
" # self.conv2 = # TODO\n",
598+
" self.conv2 = # TODO\n",
620599
"\n",
621600
" # TODO: Define the second max pooling layer\n",
622-
" self.pool2 = nn.MaxPool2d(kernel_size=2)\n",
623-
" # self.pool2 = # TODO\n",
601+
" self.pool2 = # TODO\n",
624602
"\n",
625603
" self.flatten = nn.Flatten()\n",
626604
" self.fc1 = nn.Linear(36 * 5 * 5, 128)\n",
627605
" self.relu = nn.ReLU()\n",
628606
"\n",
629607
" # TODO: Define the Linear layer that outputs the classification\n",
630608
" # logits over class labels. Remember that CrossEntropyLoss operates over logits.\n",
631-
" self.fc2 = nn.Linear(128, 10)\n",
632-
" # self.fc2 = # TODO\n",
609+
" self.fc2 = # TODO\n",
633610
"\n",
634611
"\n",
635612
" def forward(self, x):\n",
@@ -640,18 +617,7 @@
640617
"\n",
641618
" # '''TODO: Implement the rest of forward pass of the model using the layers you have defined above'''\n",
642619
" # '''hint: this will involve another set of convolutional/pooling layers and then the linear layers'''\n",
643-
" x = self.conv2(x)\n",
644-
" x = self.relu(x)\n",
645-
" x = self.pool2(x)\n",
646-
"\n",
647-
" x = self.flatten(x)\n",
648-
" x = self.fc1(x)\n",
649-
" x = self.relu(x)\n",
650-
" x = self.fc2(x)\n",
651-
"\n",
652-
" '''NOTE: Remember that we do not need to define/execute softmax (self.softmax(x))\n",
653-
" in the forward pass since we will use CrossEntropyLoss for training,\n",
654-
" which operates directly on logits'''\n",
620+
" '''TODO'''\n",
655621
"\n",
656622
" return x\n",
657623
"\n",
@@ -697,8 +663,7 @@
697663
"optimizer = optim.SGD(cnn_model.parameters(), lr=1e-2)\n",
698664
"\n",
699665
"# TODO: instantiate the cross entropy loss function\n",
700-
"loss_function = nn.CrossEntropyLoss()\n",
701-
"# loss_function = # TODO\n",
666+
"loss_function = # TODO\n",
702667
"\n",
703668
"# Redefine trainloader with new batch size parameter (tweak as see fit if optimizing)\n",
704669
"trainset_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)\n",
@@ -735,13 +700,12 @@
735700
" images, labels = images.to(device), labels.to(device)\n",
736701
"\n",
737702
" # Forward pass\n",
738-
" #'''TODO: feed the images into the model and obtain the predictions'''\n",
739-
" logits = cnn_model(images)\n",
740-
" # logits = # TODO\n",
703+
" # TODO: feed the images into the model and obtain the predictions\n",
704+
" logits = # TODO\n",
705+
"\n",
706+
" # TODO: compute the categorical cross entropy loss using the predicted logits\n",
707+
" loss = # TODO\n",
741708
"\n",
742-
" #'''TODO: compute the categorical cross entropy loss\n",
743-
" loss = loss_function(logits, labels)\n",
744-
" # loss = # TODO\n",
745709
" # Get the loss and log it to comet and the loss_history record\n",
746710
" loss_value = loss.item()\n",
747711
" comet_model_2.log_metric(\"loss\", loss_value, step=idx)\n",
@@ -751,9 +715,9 @@
751715
" # Backpropagation/backward pass\n",
752716
" '''TODO: Compute gradients for all model parameters and propagate backwads\n",
753717
" to update model parameters. remember to reset your optimizer!'''\n",
754-
" optimizer.zero_grad()\n",
755-
" loss.backward()\n",
756-
" optimizer.step()\n",
718+
" # TODO: reset optimizer\n",
719+
" # TODO: compute gradients\n",
720+
" # TODO: update model parameters\n",
757721
"\n",
758722
" # Get the prediction and tally metrics\n",
759723
" predicted = torch.argmax(logits, dim=1)\n",
@@ -788,9 +752,7 @@
788752
"outputs": [],
789753
"source": [
790754
"'''TODO: Evaluate the CNN model!'''\n",
791-
"\n",
792-
"test_loss, test_acc = evaluate(cnn_model, trainset_loader, loss_function)\n",
793-
"# test_loss, test_acc = # TODO\n",
755+
"test_loss, test_acc = evaluate('''TODO''')\n",
794756
"\n",
795757
"print('Test accuracy:', test_acc)"
796758
]
@@ -850,7 +812,7 @@
850812
},
851813
"outputs": [],
852814
"source": [
853-
"predictions_test_image"
815+
"print(predictions_test_image)"
854816
]
855817
},
856818
{
@@ -875,8 +837,7 @@
875837
"'''TODO: identify the digit with the highest likelihood prediction for the first\n",
876838
" image in the test dataset. '''\n",
877839
"predictions_value = predictions_test_image.cpu().detach().numpy() #.cpu() to copy tensor to memory first\n",
878-
"prediction = np.argmax(predictions_value)\n",
879-
"# prediction = # TODO\n",
840+
"prediction = # TODO\n",
880841
"print(prediction)"
881842
]
882843
},
@@ -935,7 +896,7 @@
935896
" probabilities = torch.nn.functional.softmax(outputs, dim=1)\n",
936897
"\n",
937898
" # Get predicted classes\n",
938-
" predicted = torch.argmax(outputs, dim=1)\n",
899+
" predicted = torch.argmax(probabilities, dim=1)\n",
939900
"\n",
940901
" all_predictions.append(probabilities)\n",
941902
" all_labels.append(labels)\n",
@@ -1018,7 +979,7 @@
1018979
"collapsed_sections": [
1019980
"Xmf_JRJa_N8C"
1020981
],
1021-
"name": "PT_Part1_MNIST_Solution.ipynb",
982+
"name": "PT_Part1_MNIST.ipynb",
1022983
"provenance": []
1023984
},
1024985
"kernelspec": {

0 commit comments

Comments
 (0)