1+ # -*- coding: utf-8 -*-
2+ """Salary-prediction.ipynb
3+
4+ Automatically generated by Colab.
5+
6+ Original file is located at
7+ https://colab.research.google.com/drive/1l22jFuwbNijASJOnbx3dE2q-8LSlvGaM
8+ """
9+
10+ !pip install pandas
11+ import pandas as pd
12+
13+ # Load the datasets
14+ Placement_Train = pd .read_csv ("/content/Placement_Test.csv" )
15+ placement_test = pd .read_csv ("/content/Placement_Train.csv" )
16+ placement_sample_submission = pd .read_csv ("/content/Plcement_Sample_Submission.csv" )
17+
18+ # Import necessary libraries
19+ import pandas as pd
20+ from sklearn .model_selection import train_test_split
21+ from sklearn .ensemble import RandomForestRegressor
22+ from sklearn .metrics import mean_squared_error
23+ from sklearn .preprocessing import LabelEncoder
24+
25+ # Load the datasets
26+ placement_train = pd .read_csv ("/content/Placement_Train.csv" )
27+ placement_test = pd .read_csv ("/content/Placement_Test.csv" )
28+ placement_sample_submission = pd .read_csv ("/content/Plcement_Sample_Submission.csv" )
29+
30+ # Display basic information about the training dataset
31+ print (placement_train .info ())
32+
33+ # Data Preprocessing
34+ # Encode categorical variables
35+ label_encoder = LabelEncoder ()
36+ categorical_cols = ['gender' , 'ssc_b' , 'hsc_b' , 'hsc_s' , 'degree_t' , 'workex' , 'specialisation' ]
37+ for col in categorical_cols :
38+ placement_train [col ] = label_encoder .fit_transform (placement_train [col ])
39+ placement_test [col ] = label_encoder .transform (placement_test [col ])
40+
41+ # Separate features and target variable
42+ X = placement_train .drop (['Annual_salary' ], axis = 1 )
43+ y = placement_train ['Annual_salary' ]
44+
45+ # Split the data into training and testing sets
46+ X_train , X_valid , y_train , y_valid = train_test_split (X , y , test_size = 0.2 , random_state = 42 )
47+
48+ # Build the model (Random Forest Regressor)
49+ model = RandomForestRegressor (n_estimators = 100 , random_state = 42 )
50+ model .fit (X_train , y_train )
51+
52+ # Predict the annual salary on the validation set
53+ y_pred = model .predict (X_valid )
54+
55+ # Evaluate the model
56+ mse = mean_squared_error (y_valid , y_pred )
57+ print (f'Mean Squared Error on Validation Set: { mse } ' )
58+
59+ # Predict annual salary on the test set
60+ test_predictions = model .predict (placement_test )
61+
62+ # Save predictions to a DataFrame
63+ submission = pd .DataFrame ({'Annual_salary' : test_predictions })
64+
65+ # Save the submission DataFrame to a CSV file
66+ submission .to_csv ('placement_submission.csv' , index = False )
67+
68+ # Predict annual salary on the test set
69+ test_predictions = model .predict (placement_test )
70+
71+ # Create a DataFrame with SR_no and predicted Annual_salary
72+ results = pd .DataFrame ({'SR_no' : placement_test ['SR_no' ], 'Predicted_Annual_salary' : test_predictions })
73+
74+ # Display the results
75+ print (results )
76+
77+ # Assuming 'results' is your DataFrame with SR_no and predicted Annual_salary
78+ results .to_csv ('predicted_salary_results.csv' , index = False )
79+
80+ # Assuming 'results' is your DataFrame with SR_no and predicted Annual_salary
81+ results = results .rename (columns = {'Predicted_Annual_salary' : 'Annual_salary' })
82+
83+ # Display the updated DataFrame
84+ print (results )
85+
86+ results .to_csv ('predicted_salary_results.csv' , index = False )
0 commit comments