You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: Model/data_preprocess.ipynb
+10-59Lines changed: 10 additions & 59 deletions
Original file line number
Diff line number
Diff line change
@@ -14,98 +14,55 @@
14
14
"id": "3b0a337f",
15
15
"metadata": {},
16
16
"outputs": [],
17
-
"source": [
18
-
"import numpy as np\n",
19
-
"import cv2 as cv\n",
20
-
"import random\n",
21
-
"import os\n",
22
-
"import matplotlib.pyplot as plt\n",
23
-
"import pickle\n",
24
-
"%matplotlib inline"
25
-
]
17
+
"source": "# Import necessary libraries\nimport numpy as np # For numerical operations and array handling\nimport cv2 as cv # OpenCV for image processing\nimport random # For shuffling data\nimport os # For file and directory operations\nimport matplotlib.pyplot as plt # For data visualization\nimport pickle # For saving processed data to disk\n%matplotlib inline"
26
18
},
27
19
{
28
20
"cell_type": "code",
29
21
"execution_count": null,
30
22
"id": "850322e9",
31
23
"metadata": {},
32
24
"outputs": [],
33
-
"source": [
34
-
"DIRECTORY = r'C:\\Users\\Arjun M S\\Desktop\\build-from-home\\dataset'\n",
35
-
"# a raw string(r'...') is used to treat backslash(\\) as a normal character\n",
36
-
"CATEGORIES = ['mammooty','mohanlal','random']"
37
-
]
25
+
"source": "# Define the dataset directory path\nDIRECTORY = r'C:\\Users\\Arjun M S\\Desktop\\build-from-home\\dataset'\n# A raw string (r'...') is used to treat backslash (\\) as a normal character\n\n# Define the three classes for classification\nCATEGORIES = ['mammooty','mohanlal','random']"
"source": "# Set the target image size for model input\nIMG_SIZE = 224\n\n# Initialize list to store image data and labels\ndata = []\n\n# Process and label all images from the dataset\nfor category in CATEGORIES:\n # Build path to category folder\n folder = os.path.join(DIRECTORY, category)\n\n # Iterate through all images in the category folder\n for img in os.listdir(folder):\n # Build full path to image file\n img_path = os.path.join(folder, img)\n\n # Assign numeric label based on category index (0: mammooty, 1: mohanlal, 2: random)\n label = CATEGORIES.index(category)\n\n # Read image in grayscale mode\n img_arr = cv.imread(img_path, cv.IMREAD_GRAYSCALE)\n\n # Resize image to standard size (224x224)\n img_arr = cv.resize(img_arr,(IMG_SIZE,IMG_SIZE))\n\n # Append [image, label] pair to data list\n data.append([img_arr,label])"
61
34
},
62
35
{
63
36
"cell_type": "code",
64
37
"execution_count": null,
65
38
"id": "738cc5fa",
66
39
"metadata": {},
67
40
"outputs": [],
68
-
"source": [
69
-
"# to shuffle the data\n",
70
-
"random.shuffle(data)"
71
-
]
41
+
"source": "# Shuffle the data to randomize the order\n# This ensures the model doesn't learn based on the order of data\nrandom.shuffle(data)"
72
42
},
73
43
{
74
44
"cell_type": "code",
75
45
"execution_count": null,
76
46
"id": "f56a39be",
77
47
"metadata": {},
78
48
"outputs": [],
79
-
"source": [
80
-
"X = []\n",
81
-
"y = []\n",
82
-
"\n",
83
-
"for features, labels in data:\n",
84
-
" X.append(features)\n",
85
-
" y.append(labels)"
86
-
]
49
+
"source": "# Separate features (X) and labels (y)\nX = [] # Will store image arrays (features)\ny = [] # Will store corresponding labels\n\n# Extract features and labels from the data list\nfor features, labels in data:\n X.append(features)\n y.append(labels)"
87
50
},
88
51
{
89
52
"cell_type": "code",
90
53
"execution_count": null,
91
54
"id": "1a78a065",
92
55
"metadata": {},
93
56
"outputs": [],
94
-
"source": [
95
-
"# changed X & y into arrays and stored it in respective variables\n",
96
-
"X = np.array(X)\n",
97
-
"y = np.array(y)"
98
-
]
57
+
"source": "# Convert lists to NumPy arrays for efficient computation\n# X contains all image data, y contains all corresponding labels\nX = np.array(X)\ny = np.array(y)"
99
58
},
100
59
{
101
60
"cell_type": "code",
102
61
"execution_count": null,
103
62
"id": "093080fa",
104
63
"metadata": {},
105
64
"outputs": [],
106
-
"source": [
107
-
"X"
108
-
]
65
+
"source": "# Display the features array\nX"
109
66
},
110
67
{
111
68
"cell_type": "code",
@@ -115,9 +72,7 @@
115
72
"scrolled": true
116
73
},
117
74
"outputs": [],
118
-
"source": [
119
-
"y"
120
-
]
75
+
"source": "# Display the labels array\ny"
121
76
},
122
77
{
123
78
"cell_type": "code",
@@ -127,11 +82,7 @@
127
82
"scrolled": true
128
83
},
129
84
"outputs": [],
130
-
"source": [
131
-
"# storing the data in a pickle file\n",
132
-
"X = pickle.dump(X,open('X.pkl', 'wb'))\n",
133
-
"y = pickle.dump(y,open('y.pkl', 'wb'))\n"
134
-
]
85
+
"source": "# Save the preprocessed data to pickle files for later use\n# Pickle files preserve the NumPy array format and can be loaded quickly\nX = pickle.dump(X,open('X.pkl', 'wb')) # Save features\ny = pickle.dump(y,open('y.pkl', 'wb')) # Save labels"
0 commit comments