Skip to content

Commit 54551c6

Browse files
authored
Update Boxoffice.ipynb
1 parent 6dc615c commit 54551c6

File tree

1 file changed

+40
-16
lines changed

1 file changed

+40
-16
lines changed

Algorithms and Deep Learning Models/Boxoffice/Boxoffice.ipynb

Lines changed: 40 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
"metadata": {},
77
"outputs": [],
88
"source": [
9+
"# Import necessary libraries\n",
910
"import numpy as np\n",
1011
"import pandas as pd\n",
1112
"import matplotlib.pyplot as plt\n",
@@ -22,7 +23,8 @@
2223
"metadata": {},
2324
"outputs": [],
2425
"source": [
25-
"df=pd.read_csv(\"D:/Documents/Data Sets/film.csv\")"
26+
"# Load the dataset\n",
27+
"df=pd.read_csv(\"D:/Documents/Data Sets/movie_dataset.csv\")"
2628
]
2729
},
2830
{
@@ -31,6 +33,7 @@
3133
"metadata": {},
3234
"outputs": [],
3335
"source": [
36+
"# Display the first 5 rows of the dataset\n",
3437
"df.head(5)"
3538
]
3639
},
@@ -40,6 +43,7 @@
4043
"metadata": {},
4144
"outputs": [],
4245
"source": [
46+
"# Get the shape of the dataset\n",
4347
"df.shape"
4448
]
4549
},
@@ -49,6 +53,7 @@
4953
"metadata": {},
5054
"outputs": [],
5155
"source": [
56+
"# Check for missing values in the dataset\n",
5257
"df.isnull().sum()"
5358
]
5459
},
@@ -58,6 +63,7 @@
5863
"metadata": {},
5964
"outputs": [],
6065
"source": [
66+
"# Drop rows with missing values\n",
6167
"df.dropna(inplace=True)"
6268
]
6369
},
@@ -67,6 +73,7 @@
6773
"metadata": {},
6874
"outputs": [],
6975
"source": [
76+
"# Verify that there are no more missing values\n",
7077
"df.isnull().sum()"
7178
]
7279
},
@@ -76,6 +83,7 @@
7683
"metadata": {},
7784
"outputs": [],
7885
"source": [
86+
"# Display information about the dataset\n",
7987
"df.info()"
8088
]
8189
},
@@ -85,7 +93,8 @@
8593
"metadata": {},
8694
"outputs": [],
8795
"source": [
88-
"cor=df['Budget'].corr(df['Revenue'])\n",
96+
"# Calculate and display correlation between budget and revenue\n",
97+
"cor=df['budget'].corr(df['revenue'])\n",
8998
"cor"
9099
]
91100
},
@@ -95,18 +104,18 @@
95104
"metadata": {},
96105
"outputs": [],
97106
"source": [
107+
"# Encode categorical variables using Label Encoding\n",
98108
"lr=preprocessing.LabelEncoder()\n",
99-
"df['Title']=lr.fit_transform(df['Title'])\n",
100-
"df['Original Title']=lr.fit_transform(df['Original Title'])\n",
101-
"df['Original Language']=lr.fit_transform(df['Original Language'])\n",
102-
"df['Status']=lr.fit_transform(df['Status'])\n",
103-
"df['Spoken Languages']=lr.fit_transform(df['Spoken Languages'])\n",
104-
"df['Production Countries']=lr.fit_transform(df['Production Countries'])\n",
105-
"df['Production Companies']=lr.fit_transform(df['Production Companies'])\n",
106-
"df['Genres']=lr.fit_transform(df['Genres'])\n",
107-
"df['Overview']=lr.fit_transform(df['Overview'])\n",
108-
"df['Release Date']=lr.fit_transform(df['Release Date'])\n",
109-
"df['Adult']=lr.fit_transform(df['Adult'])\n"
109+
"df['title']=lr.fit_transform(df['title'])\n",
110+
"df['original_title']=lr.fit_transform(df['original_title'])\n",
111+
"df['original_language']=lr.fit_transform(df['original_language'])\n",
112+
"df['status']=lr.fit_transform(df['status'])\n",
113+
"df['spoken_languages']=lr.fit_transform(df['spoken_languages'])\n",
114+
"df['production_countries']=lr.fit_transform(df['production_countries'])\n",
115+
"df['production_companies']=lr.fit_transform(df['production_companies'])\n",
116+
"df['genres']=lr.fit_transform(df['genres'])\n",
117+
"df['overview']=lr.fit_transform(df['overview'])\n",
118+
"df['release_date']=lr.fit_transform(df['release_date'])\n"
110119
]
111120
},
112121
{
@@ -115,6 +124,7 @@
115124
"metadata": {},
116125
"outputs": [],
117126
"source": [
127+
"# Display information about the dataset after encoding\n",
118128
"df.info()"
119129
]
120130
},
@@ -124,7 +134,15 @@
124134
"metadata": {},
125135
"outputs": [],
126136
"source": [
127-
"sns.heatmap(data=df)"
137+
"# Create and display a heatmap of feature correlations\n",
138+
"correlation_matrix = df.select_dtypes(include=[np.number]).corr()\n",
139+
"plt.figure(figsize=(14, 10))\n",
140+
"sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm', square=True, cbar_kws={\"shrink\": .8})\n",
141+
"plt.title('Heatmap of Feature Correlations', fontsize=20)\n",
142+
"plt.xticks(rotation=45, ha='right')\n",
143+
"plt.yticks(rotation=0)\n",
144+
"plt.tight_layout()\n",
145+
"plt.show()"
128146
]
129147
},
130148
{
@@ -133,8 +151,9 @@
133151
"metadata": {},
134152
"outputs": [],
135153
"source": [
136-
"X=df[['Budget','Popularity','Runtime']]\n",
137-
"Y=df['Revenue']\n"
154+
"# Define features and target variable for the model\n",
155+
"X=df[['budget','popularity','runtime']]\n",
156+
"Y=df['revenue']\n"
138157
]
139158
},
140159
{
@@ -143,6 +162,7 @@
143162
"metadata": {},
144163
"outputs": [],
145164
"source": [
165+
"# Split the data into training and testing sets\n",
146166
"x_train, x_test, y_train, y_test=train_test_split(X,Y, test_size=0.4)"
147167
]
148168
},
@@ -152,6 +172,7 @@
152172
"metadata": {},
153173
"outputs": [],
154174
"source": [
175+
"# Create a Linear Regression model\n",
155176
"lr=LinearRegression()"
156177
]
157178
},
@@ -161,6 +182,7 @@
161182
"metadata": {},
162183
"outputs": [],
163184
"source": [
185+
"# Fit the model to the training data\n",
164186
"lr.fit(x_train, y_train)"
165187
]
166188
},
@@ -170,6 +192,7 @@
170192
"metadata": {},
171193
"outputs": [],
172194
"source": [
195+
"# Make predictions on the testing set\n",
173196
"pred=lr.predict(x_test)"
174197
]
175198
},
@@ -179,6 +202,7 @@
179202
"metadata": {},
180203
"outputs": [],
181204
"source": [
205+
"# Evaluate the model using Mean Absolute Error\n",
182206
"print(metrics.mean_absolute_error(y_test, pred))"
183207
]
184208
}

0 commit comments

Comments
 (0)