Skip to content

Commit 738e358

Browse files
authored
Add files via upload
1 parent f4a5426 commit 738e358

File tree

1 file changed

+86
-0
lines changed

1 file changed

+86
-0
lines changed
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# -*- coding: utf-8 -*-
2+
"""Salary-prediction.ipynb
3+
4+
Automatically generated by Colab.
5+
6+
Original file is located at
7+
https://colab.research.google.com/drive/1l22jFuwbNijASJOnbx3dE2q-8LSlvGaM
8+
"""
9+
10+
!pip install pandas
11+
import pandas as pd
12+
13+
# Load the datasets
14+
Placement_Train = pd.read_csv("/content/Placement_Test.csv")
15+
placement_test = pd.read_csv("/content/Placement_Train.csv")
16+
placement_sample_submission = pd.read_csv("/content/Plcement_Sample_Submission.csv")
17+
18+
# Import necessary libraries
19+
import pandas as pd
20+
from sklearn.model_selection import train_test_split
21+
from sklearn.ensemble import RandomForestRegressor
22+
from sklearn.metrics import mean_squared_error
23+
from sklearn.preprocessing import LabelEncoder
24+
25+
# Load the datasets
26+
placement_train = pd.read_csv("/content/Placement_Train.csv")
27+
placement_test = pd.read_csv("/content/Placement_Test.csv")
28+
placement_sample_submission = pd.read_csv("/content/Plcement_Sample_Submission.csv")
29+
30+
# Display basic information about the training dataset
31+
print(placement_train.info())
32+
33+
# Data Preprocessing
34+
# Encode categorical variables
35+
label_encoder = LabelEncoder()
36+
categorical_cols = ['gender', 'ssc_b', 'hsc_b', 'hsc_s', 'degree_t', 'workex', 'specialisation']
37+
for col in categorical_cols:
38+
placement_train[col] = label_encoder.fit_transform(placement_train[col])
39+
placement_test[col] = label_encoder.transform(placement_test[col])
40+
41+
# Separate features and target variable
42+
X = placement_train.drop(['Annual_salary'], axis=1)
43+
y = placement_train['Annual_salary']
44+
45+
# Split the data into training and testing sets
46+
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)
47+
48+
# Build the model (Random Forest Regressor)
49+
model = RandomForestRegressor(n_estimators=100, random_state=42)
50+
model.fit(X_train, y_train)
51+
52+
# Predict the annual salary on the validation set
53+
y_pred = model.predict(X_valid)
54+
55+
# Evaluate the model
56+
mse = mean_squared_error(y_valid, y_pred)
57+
print(f'Mean Squared Error on Validation Set: {mse}')
58+
59+
# Predict annual salary on the test set
60+
test_predictions = model.predict(placement_test)
61+
62+
# Save predictions to a DataFrame
63+
submission = pd.DataFrame({'Annual_salary': test_predictions})
64+
65+
# Save the submission DataFrame to a CSV file
66+
submission.to_csv('placement_submission.csv', index=False)
67+
68+
# Predict annual salary on the test set
69+
test_predictions = model.predict(placement_test)
70+
71+
# Create a DataFrame with SR_no and predicted Annual_salary
72+
results = pd.DataFrame({'SR_no': placement_test['SR_no'], 'Predicted_Annual_salary': test_predictions})
73+
74+
# Display the results
75+
print(results)
76+
77+
# Assuming 'results' is your DataFrame with SR_no and predicted Annual_salary
78+
results.to_csv('predicted_salary_results.csv', index=False)
79+
80+
# Assuming 'results' is your DataFrame with SR_no and predicted Annual_salary
81+
results = results.rename(columns={'Predicted_Annual_salary': 'Annual_salary'})
82+
83+
# Display the updated DataFrame
84+
print(results)
85+
86+
results.to_csv('predicted_salary_results.csv', index=False)

0 commit comments

Comments
 (0)