diff --git a/Predicting Energy Efficiency of Buildings/Predicting Energy Efficiency of Buildings.ipynb b/Predicting Energy Efficiency of Buildings/Predicting Energy Efficiency of Buildings.ipynb new file mode 100644 index 0000000..e2fb302 --- /dev/null +++ b/Predicting Energy Efficiency of Buildings/Predicting Energy Efficiency of Buildings.ipynb @@ -0,0 +1,1683 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "504041c1", + "metadata": {}, + "source": [ + "# #Stage B Data Science Internship" + ] + }, + { + "cell_type": "markdown", + "id": "73974946", + "metadata": {}, + "source": [ + "### Graded Quiz Sol'n " + ] + }, + { + "cell_type": "markdown", + "id": "b6489cf1", + "metadata": {}, + "source": [ + "### Yamini Vijaywargiya \n", + "\n", + "#### Machine Learninig: Regression" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "f7b64448", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateApplianceslightsT1RH_1T2RH_2T3RH_3T4...T9RH_9T_outPress_mm_hgRH_outWindspeedVisibilityTdewpointrv1rv2
02016-01-11 17:00:00603019.8947.59666719.244.79000019.7944.73000019.000000...17.03333345.536.600000733.592.07.00000063.0000005.313.27543313.275433
12016-01-11 17:10:00603019.8946.69333319.244.72250019.7944.79000019.000000...17.06666745.566.483333733.692.06.66666759.1666675.218.60619518.606195
22016-01-11 17:20:00503019.8946.30000019.244.62666719.7944.93333318.926667...17.00000045.506.366667733.792.06.33333355.3333335.128.64266828.642668
32016-01-11 17:30:00504019.8946.06666719.244.59000019.7945.00000018.890000...17.00000045.406.250000733.892.06.00000051.5000005.045.41038945.410389
42016-01-11 17:40:00604019.8946.33333319.244.53000019.7945.00000018.890000...17.00000045.406.133333733.992.05.66666747.6666674.910.08409710.084097
\n", + "

5 rows × 29 columns

\n", + "
" + ], + "text/plain": [ + " date Appliances lights T1 RH_1 T2 RH_2 \\\n", + "0 2016-01-11 17:00:00 60 30 19.89 47.596667 19.2 44.790000 \n", + "1 2016-01-11 17:10:00 60 30 19.89 46.693333 19.2 44.722500 \n", + "2 2016-01-11 17:20:00 50 30 19.89 46.300000 19.2 44.626667 \n", + "3 2016-01-11 17:30:00 50 40 19.89 46.066667 19.2 44.590000 \n", + "4 2016-01-11 17:40:00 60 40 19.89 46.333333 19.2 44.530000 \n", + "\n", + " T3 RH_3 T4 ... T9 RH_9 T_out Press_mm_hg \\\n", + "0 19.79 44.730000 19.000000 ... 17.033333 45.53 6.600000 733.5 \n", + "1 19.79 44.790000 19.000000 ... 17.066667 45.56 6.483333 733.6 \n", + "2 19.79 44.933333 18.926667 ... 17.000000 45.50 6.366667 733.7 \n", + "3 19.79 45.000000 18.890000 ... 17.000000 45.40 6.250000 733.8 \n", + "4 19.79 45.000000 18.890000 ... 17.000000 45.40 6.133333 733.9 \n", + "\n", + " RH_out Windspeed Visibility Tdewpoint rv1 rv2 \n", + "0 92.0 7.000000 63.000000 5.3 13.275433 13.275433 \n", + "1 92.0 6.666667 59.166667 5.2 18.606195 18.606195 \n", + "2 92.0 6.333333 55.333333 5.1 28.642668 28.642668 \n", + "3 92.0 6.000000 51.500000 5.0 45.410389 45.410389 \n", + "4 92.0 5.666667 47.666667 4.9 10.084097 10.084097 \n", + "\n", + "[5 rows x 29 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00374/energydata_complete.csv'\n", + "df = pd.read_csv(url, error_bad_lines= False)\n", + "\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "7a4f79af", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateApplianceslightsT1RH_1T2RH_2T3RH_3T4...T9RH_9T_outPress_mm_hgRH_outWindspeedVisibilityTdewpointrv1rv2
count1973519735.00000019735.00000019735.00000019735.00000019735.00000019735.00000019735.00000019735.00000019735.000000...19735.00000019735.00000019735.00000019735.00000019735.00000019735.00000019735.00000019735.00000019735.00000019735.000000
unique19735NaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
top2016-03-03 11:20:00NaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
freq1NaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
meanNaN97.6949583.80187521.68657140.25973920.34121940.42042022.26761139.24250020.855335...19.48582841.5524017.411665755.52260279.7504184.03975238.3308343.76070724.98803324.988033
stdNaN102.5248917.9359881.6060663.9792992.1929744.0698132.0061113.2545762.042884...2.0147124.1514975.3174097.39944114.9010882.45122111.7947194.19464814.49663414.496634
minNaN10.0000000.00000016.79000027.02333316.10000020.46333317.20000028.76666715.100000...14.89000029.166667-5.000000729.30000024.0000000.0000001.000000-6.6000000.0053220.005322
25%NaN50.0000000.00000020.76000037.33333318.79000037.90000020.79000036.90000019.530000...18.00000038.5000003.666667750.93333370.3333332.00000029.0000000.90000012.49788912.497889
50%NaN60.0000000.00000021.60000039.65666720.00000040.50000022.10000038.53000020.666667...19.39000040.9000006.916667756.10000083.6666673.66666740.0000003.43333324.89765324.897653
75%NaN100.0000000.00000022.60000043.06666721.50000043.26000023.29000041.76000022.100000...20.60000044.33809510.408333760.93333391.6666675.50000040.0000006.56666737.58376937.583769
maxNaN1080.00000070.00000026.26000063.36000029.85666756.02666729.23600050.16333326.200000...24.50000053.32666726.100000772.300000100.00000014.00000066.00000015.50000049.99653049.996530
\n", + "

11 rows × 29 columns

\n", + "
" + ], + "text/plain": [ + " date Appliances lights T1 \\\n", + "count 19735 19735.000000 19735.000000 19735.000000 \n", + "unique 19735 NaN NaN NaN \n", + "top 2016-03-03 11:20:00 NaN NaN NaN \n", + "freq 1 NaN NaN NaN \n", + "mean NaN 97.694958 3.801875 21.686571 \n", + "std NaN 102.524891 7.935988 1.606066 \n", + "min NaN 10.000000 0.000000 16.790000 \n", + "25% NaN 50.000000 0.000000 20.760000 \n", + "50% NaN 60.000000 0.000000 21.600000 \n", + "75% NaN 100.000000 0.000000 22.600000 \n", + "max NaN 1080.000000 70.000000 26.260000 \n", + "\n", + " RH_1 T2 RH_2 T3 RH_3 \\\n", + "count 19735.000000 19735.000000 19735.000000 19735.000000 19735.000000 \n", + "unique NaN NaN NaN NaN NaN \n", + "top NaN NaN NaN NaN NaN \n", + "freq NaN NaN NaN NaN NaN \n", + "mean 40.259739 20.341219 40.420420 22.267611 39.242500 \n", + "std 3.979299 2.192974 4.069813 2.006111 3.254576 \n", + "min 27.023333 16.100000 20.463333 17.200000 28.766667 \n", + "25% 37.333333 18.790000 37.900000 20.790000 36.900000 \n", + "50% 39.656667 20.000000 40.500000 22.100000 38.530000 \n", + "75% 43.066667 21.500000 43.260000 23.290000 41.760000 \n", + "max 63.360000 29.856667 56.026667 29.236000 50.163333 \n", + "\n", + " T4 ... T9 RH_9 T_out \\\n", + "count 19735.000000 ... 19735.000000 19735.000000 19735.000000 \n", + "unique NaN ... NaN NaN NaN \n", + "top NaN ... NaN NaN NaN \n", + "freq NaN ... NaN NaN NaN \n", + "mean 20.855335 ... 19.485828 41.552401 7.411665 \n", + "std 2.042884 ... 2.014712 4.151497 5.317409 \n", + "min 15.100000 ... 14.890000 29.166667 -5.000000 \n", + "25% 19.530000 ... 18.000000 38.500000 3.666667 \n", + "50% 20.666667 ... 19.390000 40.900000 6.916667 \n", + "75% 22.100000 ... 20.600000 44.338095 10.408333 \n", + "max 26.200000 ... 24.500000 53.326667 26.100000 \n", + "\n", + " Press_mm_hg RH_out Windspeed Visibility Tdewpoint \\\n", + "count 19735.000000 19735.000000 19735.000000 19735.000000 19735.000000 \n", + "unique NaN NaN NaN NaN NaN \n", + "top NaN NaN NaN NaN NaN \n", + "freq NaN NaN NaN NaN NaN \n", + "mean 755.522602 79.750418 4.039752 38.330834 3.760707 \n", + "std 7.399441 14.901088 2.451221 11.794719 4.194648 \n", + "min 729.300000 24.000000 0.000000 1.000000 -6.600000 \n", + "25% 750.933333 70.333333 2.000000 29.000000 0.900000 \n", + "50% 756.100000 83.666667 3.666667 40.000000 3.433333 \n", + "75% 760.933333 91.666667 5.500000 40.000000 6.566667 \n", + "max 772.300000 100.000000 14.000000 66.000000 15.500000 \n", + "\n", + " rv1 rv2 \n", + "count 19735.000000 19735.000000 \n", + "unique NaN NaN \n", + "top NaN NaN \n", + "freq NaN NaN \n", + "mean 24.988033 24.988033 \n", + "std 14.496634 14.496634 \n", + "min 0.005322 0.005322 \n", + "25% 12.497889 12.497889 \n", + "50% 24.897653 24.897653 \n", + "75% 37.583769 37.583769 \n", + "max 49.996530 49.996530 \n", + "\n", + "[11 rows x 29 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe(include ='all')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "b8b2a8e8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "date 0\n", + "Appliances 0\n", + "lights 0\n", + "T1 0\n", + "RH_1 0\n", + "T2 0\n", + "RH_2 0\n", + "T3 0\n", + "RH_3 0\n", + "T4 0\n", + "RH_4 0\n", + "T5 0\n", + "RH_5 0\n", + "T6 0\n", + "RH_6 0\n", + "T7 0\n", + "RH_7 0\n", + "T8 0\n", + "RH_8 0\n", + "T9 0\n", + "RH_9 0\n", + "T_out 0\n", + "Press_mm_hg 0\n", + "RH_out 0\n", + "Windspeed 0\n", + "Visibility 0\n", + "Tdewpoint 0\n", + "rv1 0\n", + "rv2 0\n", + "dtype: int64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "24f62118", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "column_names = {'date': 'Date', 'Appliances': 'Appliances', 'lights':'Lights', 'T1':'Temp_Kitchen',\n", + " 'RH_1':'Humidity_Kitchen', 'T2':'Temp_LivingRoom', 'RH_2':'Humidity_LivingRoom', \n", + " 'T3':'Temp_LaundryRoom', 'RH_3':'Humidity_LaundryRoom', 'T4':'Temp_Office', \n", + " 'RH_4':'Humidity_Office', 'T5':'Temp_Bathroom', 'RH_5':'Humidity_Bathroom', \n", + " 'T6': 'Temp_Outside_Building', 'RH_6': 'Humidity_Outside_Building', \n", + " 'T7': 'Temp_IroningRoom', 'RH_7': 'Humidity_IroningRoom',\n", + " 'T8': 'Temp_TeenagerRoom', 'RH_8': 'Humidity_TeenagerRoom', \n", + " 'T9': 'Temp_ParentsRoom', 'RH_9': 'Humidity_ParentsRoom', 'T_out': 'Temp_Outside', \n", + " 'Press_mm_hg': 'Press_mm_hg', 'RH_out': 'Humidity_Outside', 'Windspeed': 'Windspeed', \n", + " 'Visibility': 'Visibility', 'Tdewpoint': 'T_Dewpoint', 'rv1': 'Random_Var1', 'rv2': 'Random_Var2'}\n", + "\n", + "df = df.rename(columns = column_names)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "c994f4e7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DateAppliancesLightsTemp_KitchenHumidity_KitchenTemp_LivingRoomHumidity_LivingRoomTemp_LaundryRoomHumidity_LaundryRoomTemp_Office...Temp_ParentsRoomHumidity_ParentsRoomTemp_OutsidePress_mm_hgHumidity_OutsideWindspeedVisibilityT_DewpointRandom_Var1Random_Var2
02016-01-11 17:00:00603019.8947.59666719.244.79000019.7944.73000019.000000...17.03333345.536.600000733.592.07.00000063.0000005.313.27543313.275433
12016-01-11 17:10:00603019.8946.69333319.244.72250019.7944.79000019.000000...17.06666745.566.483333733.692.06.66666759.1666675.218.60619518.606195
22016-01-11 17:20:00503019.8946.30000019.244.62666719.7944.93333318.926667...17.00000045.506.366667733.792.06.33333355.3333335.128.64266828.642668
32016-01-11 17:30:00504019.8946.06666719.244.59000019.7945.00000018.890000...17.00000045.406.250000733.892.06.00000051.5000005.045.41038945.410389
42016-01-11 17:40:00604019.8946.33333319.244.53000019.7945.00000018.890000...17.00000045.406.133333733.992.05.66666747.6666674.910.08409710.084097
\n", + "

5 rows × 29 columns

\n", + "
" + ], + "text/plain": [ + " Date Appliances Lights Temp_Kitchen Humidity_Kitchen \\\n", + "0 2016-01-11 17:00:00 60 30 19.89 47.596667 \n", + "1 2016-01-11 17:10:00 60 30 19.89 46.693333 \n", + "2 2016-01-11 17:20:00 50 30 19.89 46.300000 \n", + "3 2016-01-11 17:30:00 50 40 19.89 46.066667 \n", + "4 2016-01-11 17:40:00 60 40 19.89 46.333333 \n", + "\n", + " Temp_LivingRoom Humidity_LivingRoom Temp_LaundryRoom \\\n", + "0 19.2 44.790000 19.79 \n", + "1 19.2 44.722500 19.79 \n", + "2 19.2 44.626667 19.79 \n", + "3 19.2 44.590000 19.79 \n", + "4 19.2 44.530000 19.79 \n", + "\n", + " Humidity_LaundryRoom Temp_Office ... Temp_ParentsRoom \\\n", + "0 44.730000 19.000000 ... 17.033333 \n", + "1 44.790000 19.000000 ... 17.066667 \n", + "2 44.933333 18.926667 ... 17.000000 \n", + "3 45.000000 18.890000 ... 17.000000 \n", + "4 45.000000 18.890000 ... 17.000000 \n", + "\n", + " Humidity_ParentsRoom Temp_Outside Press_mm_hg Humidity_Outside \\\n", + "0 45.53 6.600000 733.5 92.0 \n", + "1 45.56 6.483333 733.6 92.0 \n", + "2 45.50 6.366667 733.7 92.0 \n", + "3 45.40 6.250000 733.8 92.0 \n", + "4 45.40 6.133333 733.9 92.0 \n", + "\n", + " Windspeed Visibility T_Dewpoint Random_Var1 Random_Var2 \n", + "0 7.000000 63.000000 5.3 13.275433 13.275433 \n", + "1 6.666667 59.166667 5.2 18.606195 18.606195 \n", + "2 6.333333 55.333333 5.1 28.642668 28.642668 \n", + "3 6.000000 51.500000 5.0 45.410389 45.410389 \n", + "4 5.666667 47.666667 4.9 10.084097 10.084097 \n", + "\n", + "[5 rows x 29 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "bd0de804", + "metadata": {}, + "outputs": [], + "source": [ + "# Dropping Columns\n", + "df.drop(['Date', 'Lights'], inplace=True, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "f34809bc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AppliancesTemp_KitchenHumidity_KitchenTemp_LivingRoomHumidity_LivingRoomTemp_LaundryRoomHumidity_LaundryRoomTemp_OfficeHumidity_OfficeTemp_Bathroom...Temp_ParentsRoomHumidity_ParentsRoomTemp_OutsidePress_mm_hgHumidity_OutsideWindspeedVisibilityT_DewpointRandom_Var1Random_Var2
06019.8947.59666719.244.79000019.7944.73000019.00000045.56666717.166667...17.03333345.536.600000733.592.07.00000063.0000005.313.27543313.275433
16019.8946.69333319.244.72250019.7944.79000019.00000045.99250017.166667...17.06666745.566.483333733.692.06.66666759.1666675.218.60619518.606195
25019.8946.30000019.244.62666719.7944.93333318.92666745.89000017.166667...17.00000045.506.366667733.792.06.33333355.3333335.128.64266828.642668
35019.8946.06666719.244.59000019.7945.00000018.89000045.72333317.166667...17.00000045.406.250000733.892.06.00000051.5000005.045.41038945.410389
46019.8946.33333319.244.53000019.7945.00000018.89000045.53000017.200000...17.00000045.406.133333733.992.05.66666747.6666674.910.08409710.084097
\n", + "

5 rows × 27 columns

\n", + "
" + ], + "text/plain": [ + " Appliances Temp_Kitchen Humidity_Kitchen Temp_LivingRoom \\\n", + "0 60 19.89 47.596667 19.2 \n", + "1 60 19.89 46.693333 19.2 \n", + "2 50 19.89 46.300000 19.2 \n", + "3 50 19.89 46.066667 19.2 \n", + "4 60 19.89 46.333333 19.2 \n", + "\n", + " Humidity_LivingRoom Temp_LaundryRoom Humidity_LaundryRoom Temp_Office \\\n", + "0 44.790000 19.79 44.730000 19.000000 \n", + "1 44.722500 19.79 44.790000 19.000000 \n", + "2 44.626667 19.79 44.933333 18.926667 \n", + "3 44.590000 19.79 45.000000 18.890000 \n", + "4 44.530000 19.79 45.000000 18.890000 \n", + "\n", + " Humidity_Office Temp_Bathroom ... Temp_ParentsRoom \\\n", + "0 45.566667 17.166667 ... 17.033333 \n", + "1 45.992500 17.166667 ... 17.066667 \n", + "2 45.890000 17.166667 ... 17.000000 \n", + "3 45.723333 17.166667 ... 17.000000 \n", + "4 45.530000 17.200000 ... 17.000000 \n", + "\n", + " Humidity_ParentsRoom Temp_Outside Press_mm_hg Humidity_Outside \\\n", + "0 45.53 6.600000 733.5 92.0 \n", + "1 45.56 6.483333 733.6 92.0 \n", + "2 45.50 6.366667 733.7 92.0 \n", + "3 45.40 6.250000 733.8 92.0 \n", + "4 45.40 6.133333 733.9 92.0 \n", + "\n", + " Windspeed Visibility T_Dewpoint Random_Var1 Random_Var2 \n", + "0 7.000000 63.000000 5.3 13.275433 13.275433 \n", + "1 6.666667 59.166667 5.2 18.606195 18.606195 \n", + "2 6.333333 55.333333 5.1 28.642668 28.642668 \n", + "3 6.000000 51.500000 5.0 45.410389 45.410389 \n", + "4 5.666667 47.666667 4.9 10.084097 10.084097 \n", + "\n", + "[5 rows x 27 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "5249c0c4", + "metadata": {}, + "outputs": [], + "source": [ + "# Data Normalization\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "scaler = MinMaxScaler()\n", + "normalised_df = pd.DataFrame(scaler.fit_transform(df), columns = df.columns)\n", + "features_df = normalised_df.drop(columns = ['Appliances'])\n", + "appliances_target = normalised_df['Appliances']" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "4bb31164", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Temp_KitchenHumidity_KitchenTemp_LivingRoomHumidity_LivingRoomTemp_LaundryRoomHumidity_LaundryRoomTemp_OfficeHumidity_OfficeTemp_BathroomHumidity_Bathroom...Temp_ParentsRoomHumidity_ParentsRoomTemp_OutsidePress_mm_hgHumidity_OutsideWindspeedVisibilityT_DewpointRandom_Var1Random_Var2
00.327350.5661870.2253450.6840380.2151880.7460660.3513510.7642620.1755060.381691...0.2230320.6772900.3729900.0976740.8947370.5000000.9538460.5384620.2654490.265449
10.327350.5413260.2253450.6821400.2151880.7488710.3513510.7824370.1755060.381691...0.2265000.6785320.3692390.1000000.8947370.4761900.8948720.5339370.3720830.372083
20.327350.5305020.2253450.6794450.2151880.7555690.3447450.7780620.1755060.380037...0.2195630.6760490.3654880.1023260.8947370.4523810.8358970.5294120.5728480.572848
30.327350.5240800.2253450.6784140.2151880.7586850.3414410.7709490.1755060.380037...0.2195630.6719090.3617360.1046510.8947370.4285710.7769230.5248870.9082610.908261
40.327350.5314190.2253450.6767270.2151880.7586850.3414410.7626970.1786910.380037...0.2195630.6719090.3579850.1069770.8947370.4047620.7179490.5203620.2016110.201611
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " Temp_Kitchen Humidity_Kitchen Temp_LivingRoom Humidity_LivingRoom \\\n", + "0 0.32735 0.566187 0.225345 0.684038 \n", + "1 0.32735 0.541326 0.225345 0.682140 \n", + "2 0.32735 0.530502 0.225345 0.679445 \n", + "3 0.32735 0.524080 0.225345 0.678414 \n", + "4 0.32735 0.531419 0.225345 0.676727 \n", + "\n", + " Temp_LaundryRoom Humidity_LaundryRoom Temp_Office Humidity_Office \\\n", + "0 0.215188 0.746066 0.351351 0.764262 \n", + "1 0.215188 0.748871 0.351351 0.782437 \n", + "2 0.215188 0.755569 0.344745 0.778062 \n", + "3 0.215188 0.758685 0.341441 0.770949 \n", + "4 0.215188 0.758685 0.341441 0.762697 \n", + "\n", + " Temp_Bathroom Humidity_Bathroom ... Temp_ParentsRoom \\\n", + "0 0.175506 0.381691 ... 0.223032 \n", + "1 0.175506 0.381691 ... 0.226500 \n", + "2 0.175506 0.380037 ... 0.219563 \n", + "3 0.175506 0.380037 ... 0.219563 \n", + "4 0.178691 0.380037 ... 0.219563 \n", + "\n", + " Humidity_ParentsRoom Temp_Outside Press_mm_hg Humidity_Outside \\\n", + "0 0.677290 0.372990 0.097674 0.894737 \n", + "1 0.678532 0.369239 0.100000 0.894737 \n", + "2 0.676049 0.365488 0.102326 0.894737 \n", + "3 0.671909 0.361736 0.104651 0.894737 \n", + "4 0.671909 0.357985 0.106977 0.894737 \n", + "\n", + " Windspeed Visibility T_Dewpoint Random_Var1 Random_Var2 \n", + "0 0.500000 0.953846 0.538462 0.265449 0.265449 \n", + "1 0.476190 0.894872 0.533937 0.372083 0.372083 \n", + "2 0.452381 0.835897 0.529412 0.572848 0.572848 \n", + "3 0.428571 0.776923 0.524887 0.908261 0.908261 \n", + "4 0.404762 0.717949 0.520362 0.201611 0.201611 \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "features_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "d4ac60a9", + "metadata": {}, + "outputs": [], + "source": [ + "# spliting data into training and test set i 70-30 with 42 random_state.\n", + "from sklearn.model_selection import train_test_split\n", + "x_train, x_test, y_train, y_test = train_test_split(features_df, \n", + " appliances_target, test_size=0.3, \n", + " random_state=42)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "9c38ba2a", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "linear_model = LinearRegression()\n", + "linear_model.fit(x_train, y_train)\n", + "predicted_values = linear_model.predict(x_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "e83416a6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "45.35" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Q.14\n", + "import numpy as np\n", + "rss = np.sum(np.square(y_test - predicted_values))\n", + "round(rss, 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "2ab99def", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.088" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Q.15\n", + "from sklearn.metrics import mean_squared_error\n", + "rms = np.sqrt(mean_squared_error(y_test, predicted_values))\n", + "round(rms, 3)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "599d3b2c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.05" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Q.13\n", + "from sklearn.metrics import mean_absolute_error\n", + "mean = mean_absolute_error(y_test, predicted_values)\n", + "round(mean,2)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "fa27a198", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Features Humidity_Kitchen\n", + "Linear_Model_Weight 0.553547\n", + "Name: 25, dtype: object\n", + "Features Humidity_LivingRoom\n", + "Linear_Model_Weight -0.456698\n", + "Name: 0, dtype: object\n" + ] + } + ], + "source": [ + "# Q.17\n", + "def get_weights_df(model, feat, column_name):\n", + " weights = pd.Series(model.coef_, feat.columns).sort_values()\n", + " weights_df = pd.DataFrame(weights).reset_index()\n", + " weights_df.columns = ['Features', column_name]\n", + " weights_df[column_name].round(3)\n", + " return weights_df\n", + "\n", + "linear_model_weights = get_weights_df(linear_model, x_train, 'Linear_Model_Weight')\n", + "\n", + "print(linear_model_weights.iloc[linear_model_weights['Linear_Model_Weight'].idxmax()])\n", + "\n", + "print(linear_model_weights.iloc[linear_model_weights['Linear_Model_Weight'].idxmin()])" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "4ba26e82", + "metadata": {}, + "outputs": [], + "source": [ + "# Lasso Regression\n", + "from sklearn.linear_model import Lasso\n", + "\n", + "lasso_R = Lasso(alpha=0.001)\n", + "lasso_R.fit(x_train, y_train)\n", + "lasso_pred = lasso_R.predict(x_test)\n", + "\n", + "lasso_weight = get_weights_df(lasso_R, x_train, 'Lasso_weight')" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "68417d8f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.025\n", + " Features Lasso_weight\n", + "0 Humidity_Outside -0.049557\n", + "1 Humidity_TeenagerRoom -0.000110\n", + "2 Temp_Kitchen 0.000000\n", + "3 T_Dewpoint 0.000000\n", + "4 Visibility 0.000000\n", + "5 Press_mm_hg -0.000000\n", + "6 Temp_Outside 0.000000\n", + "7 Humidity_ParentsRoom -0.000000\n", + "8 Temp_ParentsRoom -0.000000\n", + "9 Temp_TeenagerRoom 0.000000\n", + "10 Humidity_IroningRoom -0.000000\n", + "11 Random_Var1 -0.000000\n", + "12 Temp_IroningRoom -0.000000\n", + "13 Temp_Outside_Building 0.000000\n", + "14 Humidity_Bathroom 0.000000\n", + "15 Temp_Bathroom -0.000000\n", + "16 Humidity_Office 0.000000\n", + "17 Temp_Office -0.000000\n", + "18 Humidity_LaundryRoom 0.000000\n", + "19 Temp_LaundryRoom 0.000000\n", + "20 Humidity_LivingRoom -0.000000\n", + "21 Temp_LivingRoom 0.000000\n", + "22 Humidity_Outside_Building -0.000000\n", + "23 Random_Var2 -0.000000\n", + "24 Windspeed 0.002912\n", + "25 Humidity_Kitchen 0.017880\n" + ] + } + ], + "source": [ + "print(lasso_R.score(x_train, y_train).round(3))\n", + "print(lasso_weight)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "bcfad006", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "4\n" + ] + } + ], + "source": [ + "# Q.19\n", + "print((lasso_weights_df['Lasso_weight'] != 0).sum())" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "884998b5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.094" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Q20\n", + "lasso_rmse = np.sqrt(mean_squared_error(y_test, lasso_pred))\n", + "round(lasso_rmse, 3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "be32cb67", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Predicting Energy Efficiency of Buildings/README.md b/Predicting Energy Efficiency of Buildings/README.md new file mode 100644 index 0000000..60c4eb4 --- /dev/null +++ b/Predicting Energy Efficiency of Buildings/README.md @@ -0,0 +1,5 @@ +we will develop a multivariate multiple regression model to study the effect of eight input variables on two output variables, which are the heating load and the cooling load, of residential buildings. +you will learn about simple linear regression and the different assumptions made by simple linear regression models. +you will learn about multiple linear regression and assumptions made by multiple linear regression models. +you will learn about different evaluation metrics for measuring regression performance. +ou will learn about regularization as a method to make complex models simpler by penalising coefficients to reduce their magnitude, variance in the training set and in turn, reduce overfitting in the model.