Skip to content

Commit f456afb

Browse files
committed
added more comments for clarity
1 parent 2320571 commit f456afb

File tree

1 file changed

+10
-8
lines changed
  • Prediction Models/Handwritten Digit Recognition

1 file changed

+10
-8
lines changed

Prediction Models/Handwritten Digit Recognition/svm.ipynb

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,8 @@
227227
],
228228
"source": [
229229
"df_train = pd.read_csv(\"train.csv\")\n",
230-
"df_train.head()"
230+
"df_train.head()\n",
231+
"# There are 784 features, each representing a pixel of an image that is represented by a row."
231232
]
232233
},
233234
{
@@ -258,7 +259,7 @@
258259
}
259260
],
260261
"source": [
261-
"# it is clear that the dataset is not linearly separable and so, we can't employ a linear kernel and must transform input data to higher-dimensional space. \n",
262+
"# It is clear that the dataset is not linearly separable and so, we can't employ a linear kernel and must transform input data to higher-dimensional space. \n",
262263
"# This is done using kernel trick when we invoke SVC() from the sklearn library with a non-linear kernel\n",
263264
"sns.pairplot(df_train, x_vars=['pixel0', 'pixel1', 'pixel2', 'pixel3', 'pixel4', 'pixel5', 'pixel6'], y_vars=['pixel7', 'pixel8', 'pixel9', 'pixel10', 'pixel11', 'pixel12'], hue='label')"
264265
]
@@ -270,7 +271,7 @@
270271
"metadata": {},
271272
"outputs": [],
272273
"source": [
273-
"# segregating the dataset into features and label.\n",
274+
"# Segregating the dataset into features and label.\n",
274275
"X = df_train.drop('label', axis=1)\n",
275276
"X = X.values\n",
276277
"y = df_train['label']\n",
@@ -295,7 +296,7 @@
295296
"metadata": {},
296297
"outputs": [],
297298
"source": [
298-
"# normalizing the data is important when we use svm to avoid giving unintentional priority to a feature owing to increased avg value\n",
299+
"# Normalizing the data is important when we use svm to avoid giving unintentional priority to a feature owing to increased avg value\n",
299300
"scaler = StandardScaler()\n",
300301
"X_train = scaler.fit_transform(X_train)\n",
301302
"X_test = scaler.transform(X_test)"
@@ -726,7 +727,7 @@
726727
}
727728
],
728729
"source": [
729-
"# using default values for c, gamma and the default 'rbf' kernel\n",
730+
"# Using default values for c, gamma and the default 'rbf' kernel\n",
730731
"model = SVC()\n",
731732
"model.fit(X_train, y_train)"
732733
]
@@ -859,11 +860,12 @@
859860
}
860861
],
861862
"source": [
862-
"df_test = pd.read_csv(\"test.csv\", nrows=15) # for demonstrative purposes, limiting the rows read to speed up the process\n",
863+
"df_test = pd.read_csv(\"test.csv\", nrows=15) # For demonstrative purposes, limiting the rows read to speed up the process\n",
863864
"X_testing = df_test.values\n",
864865
"X_testing = scaler.transform(X_testing)\n",
865866
"y_test_pred = model.predict(X_testing) \n",
866-
"random_arr = [random.randint(0, len(df_test)-1) for _ in range(10)] # visualizing 10 test samples, manually checking the accuracy of prediction\n",
867+
"random_arr = [random.randint(0, len(df_test)-1) for _ in range(10)] # Visualizing 10 test samples, manually checking the accuracy of prediction\n",
868+
"# We resize the 784 columns row to a 28x28 grid to visualize the digit \n",
867869
"for i in random_arr: \n",
868870
" image = np.reshape(X_testing[i], (28, 28))\n",
869871
" plt.title(f'Predicted: {y_test_pred[i]}')\n",
@@ -878,7 +880,7 @@
878880
"metadata": {},
879881
"outputs": [],
880882
"source": [
881-
"# use GridSearchCV to get the best parameters for our svm model that lead to increased accuracy\n",
883+
"# Use GridSearchCV to get the best parameters for our svm model that lead to increased accuracy\n",
882884
"\n",
883885
"grid = {\n",
884886
" 'C': [0.1, 1, 10, 100, 1000], \n",

0 commit comments

Comments
 (0)