diff --git a/Advanced Featuretools RUL.ipynb b/Advanced Featuretools RUL.ipynb index cb476e5..de805d4 100644 --- a/Advanced Featuretools RUL.ipynb +++ b/Advanced Featuretools RUL.ipynb @@ -345,7 +345,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "Elapsed: 00:01 | Remaining: 00:00 | Progress: 100%|██████████| engine_no: 2490/2490 \n" + "Elapsed: 00:00 | Remaining: ? | Progress: 0%| | engine_no: 0/2490 " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Elapsed: 00:00 | Remaining: 00:00 | Progress: 100%|██████████| engine_no: 2490/2490 \n" ] }, { @@ -373,12 +380,6 @@ " time\n", " remaining_useful_life\n", " \n", - " \n", - " id\n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", @@ -416,13 +417,12 @@ "" ], "text/plain": [ - " engine_no time remaining_useful_life\n", - "id \n", - "0 1 2000-01-01 16:40:00 220\n", - "1 1 2000-01-01 18:20:00 210\n", - "2 1 2000-01-01 20:00:00 200\n", - "3 1 2000-01-01 21:40:00 190\n", - "4 1 2000-01-01 23:20:00 180" + " engine_no time remaining_useful_life\n", + "0 1 2000-01-01 16:40:00 220\n", + "1 1 2000-01-01 18:20:00 210\n", + "2 1 2000-01-01 20:00:00 200\n", + "3 1 2000-01-01 21:40:00 190\n", + "4 1 2000-01-01 23:20:00 180" ] }, "execution_count": 3, @@ -435,7 +435,7 @@ " return len(df) - 1\n", "\n", "lm = cp.LabelMaker(\n", - " target_entity='engine_no',\n", + " target_dataframe_index='engine_no',\n", " time_index='time',\n", " labeling_function=remaining_useful_life,\n", ")\n", @@ -488,12 +488,6 @@ " time\n", " remaining_useful_life\n", " \n", - " \n", - " id\n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", @@ -532,7 +526,6 @@ ], "text/plain": [ " engine_no time remaining_useful_life\n", - "id \n", "1 1 2000-01-01 18:20:00 210\n", "9 1 2000-01-02 07:40:00 130\n", "10 2 2000-01-03 22:10:00 198\n", @@ -568,18 +561,288 @@ "cell_type": "code", "execution_count": 5, "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
engine_notime_in_cyclesoperational_setting_1operational_setting_2operational_setting_3sensor_measurement_1sensor_measurement_2sensor_measurement_3sensor_measurement_4sensor_measurement_5...sensor_measurement_14sensor_measurement_15sensor_measurement_16sensor_measurement_17sensor_measurement_18sensor_measurement_19sensor_measurement_20sensor_measurement_21indextime
index
01142.00490.8400100.0445.00549.681343.431112.933.91...8074.839.33350.023302212100.0010.626.367002000-01-01 00:00:00
11220.00200.7002100.0491.19606.071477.611237.509.35...8046.139.19130.023612324100.0024.3714.655212000-01-01 00:10:00
21342.00380.8409100.0445.00548.951343.121117.053.91...8066.629.40070.023292212100.0010.486.421322000-01-01 00:20:00
31442.00000.8400100.0445.00548.701341.241118.033.91...8076.059.33690.023282212100.0010.546.417632000-01-01 00:30:00
41525.00630.620760.0462.54536.101255.231033.597.05...7865.8010.83660.02305191584.9314.038.675442000-01-01 00:40:00
\n", + "

5 rows × 28 columns

\n", + "
" + ], + "text/plain": [ + " engine_no time_in_cycles operational_setting_1 \\\n", + "index \n", + "0 1 1 42.0049 \n", + "1 1 2 20.0020 \n", + "2 1 3 42.0038 \n", + "3 1 4 42.0000 \n", + "4 1 5 25.0063 \n", + "\n", + " operational_setting_2 operational_setting_3 sensor_measurement_1 \\\n", + "index \n", + "0 0.8400 100.0 445.00 \n", + "1 0.7002 100.0 491.19 \n", + "2 0.8409 100.0 445.00 \n", + "3 0.8400 100.0 445.00 \n", + "4 0.6207 60.0 462.54 \n", + "\n", + " sensor_measurement_2 sensor_measurement_3 sensor_measurement_4 \\\n", + "index \n", + "0 549.68 1343.43 1112.93 \n", + "1 606.07 1477.61 1237.50 \n", + "2 548.95 1343.12 1117.05 \n", + "3 548.70 1341.24 1118.03 \n", + "4 536.10 1255.23 1033.59 \n", + "\n", + " sensor_measurement_5 ... sensor_measurement_14 \\\n", + "index ... \n", + "0 3.91 ... 8074.83 \n", + "1 9.35 ... 8046.13 \n", + "2 3.91 ... 8066.62 \n", + "3 3.91 ... 8076.05 \n", + "4 7.05 ... 7865.80 \n", + "\n", + " sensor_measurement_15 sensor_measurement_16 sensor_measurement_17 \\\n", + "index \n", + "0 9.3335 0.02 330 \n", + "1 9.1913 0.02 361 \n", + "2 9.4007 0.02 329 \n", + "3 9.3369 0.02 328 \n", + "4 10.8366 0.02 305 \n", + "\n", + " sensor_measurement_18 sensor_measurement_19 sensor_measurement_20 \\\n", + "index \n", + "0 2212 100.00 10.62 \n", + "1 2324 100.00 24.37 \n", + "2 2212 100.00 10.48 \n", + "3 2212 100.00 10.54 \n", + "4 1915 84.93 14.03 \n", + "\n", + " sensor_measurement_21 index time \n", + "index \n", + "0 6.3670 0 2000-01-01 00:00:00 \n", + "1 14.6552 1 2000-01-01 00:10:00 \n", + "2 6.4213 2 2000-01-01 00:20:00 \n", + "3 6.4176 3 2000-01-01 00:30:00 \n", + "4 8.6754 4 2000-01-01 00:40:00 \n", + "\n", + "[5 rows x 28 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "scrolled": true + }, "outputs": [ { "data": { "text/plain": [ "Entityset: Dataset\n", - " Entities:\n", - " recordings [Rows: 61249, Columns: 29]\n", + " DataFrames:\n", + " rul_data [Rows: 61249, Columns: 29]\n", " engines [Rows: 249, Columns: 2]\n", " settings_clusters [Rows: 50, Columns: 2]\n", " Relationships:\n", - " recordings.engine_no -> engines.engine_no\n", - " recordings.settings_clusters -> settings_clusters.settings_clusters" + " rul_data.engine_no -> engines.engine_no\n", + " rul_data.settings_clusters -> settings_clusters.settings_clusters" ] }, "execution_count": 5, @@ -604,28 +867,27 @@ "\n", " es = ft.EntitySet('Dataset')\n", "\n", - " es.entity_from_dataframe(\n", + " es.add_dataframe(\n", " dataframe=data,\n", - " entity_id='recordings',\n", + " dataframe_name='rul_data',\n", " index='index',\n", " time_index='time',\n", " )\n", "\n", - " es.normalize_entity(\n", - " base_entity_id='recordings',\n", - " new_entity_id='engines',\n", + " es.normalize_dataframe(\n", + " base_dataframe_name ='rul_data',\n", + " new_dataframe_name ='engines',\n", " index='engine_no',\n", " )\n", "\n", - " es.normalize_entity(\n", - " base_entity_id='recordings',\n", - " new_entity_id='settings_clusters',\n", - " index='settings_clusters',\n", + " es.normalize_dataframe(\n", + " base_dataframe_name='rul_data',\n", + " new_dataframe_name='settings_clusters',\n", + " index='settings_clusters'\n", " )\n", "\n", " return es, kmeans\n", "\n", - "\n", "es, kmeans = make_entityset(data, nclusters)\n", "es" ] @@ -648,87 +910,87 @@ "\n", "\n", - "\n", "\n", - "\n", - "\n", + "\n", + "\n", "Dataset\n", - "\n", - "\n", + "\n", + "\n", "\n", - "recordings\n", - "\n", - "recordings (61249 rows)\n", - "\n", - "index : index\n", - "engine_no : id\n", - "time_in_cycles : numeric\n", - "operational_setting_1 : numeric\n", - "operational_setting_2 : numeric\n", - "operational_setting_3 : numeric\n", - "sensor_measurement_1 : numeric\n", - "sensor_measurement_2 : numeric\n", - "sensor_measurement_3 : numeric\n", - "sensor_measurement_4 : numeric\n", - "sensor_measurement_5 : numeric\n", - "sensor_measurement_6 : numeric\n", - "sensor_measurement_7 : numeric\n", - "sensor_measurement_8 : numeric\n", - "sensor_measurement_9 : numeric\n", - "sensor_measurement_10 : numeric\n", - "sensor_measurement_11 : numeric\n", - "sensor_measurement_12 : numeric\n", - "sensor_measurement_13 : numeric\n", - "sensor_measurement_14 : numeric\n", - "sensor_measurement_15 : numeric\n", - "sensor_measurement_16 : numeric\n", - "sensor_measurement_17 : numeric\n", - "sensor_measurement_18 : numeric\n", - "sensor_measurement_19 : numeric\n", - "sensor_measurement_20 : numeric\n", - "sensor_measurement_21 : numeric\n", - "time : datetime_time_index\n", - "settings_clusters : id\n", + "rul_data\n", + "\n", + "rul_data (61249 rows)\n", + "\n", + "engine_no : Integer; foreign_key\n", + "time_in_cycles : Integer\n", + "operational_setting_1 : Double\n", + "operational_setting_2 : Double\n", + "operational_setting_3 : Double\n", + "sensor_measurement_1 : Double\n", + "sensor_measurement_2 : Double\n", + "sensor_measurement_3 : Double\n", + "sensor_measurement_4 : Double\n", + "sensor_measurement_5 : Double\n", + "sensor_measurement_6 : Double\n", + "sensor_measurement_7 : Double\n", + "sensor_measurement_8 : Double\n", + "sensor_measurement_9 : Double\n", + "sensor_measurement_10 : Double\n", + "sensor_measurement_11 : Double\n", + "sensor_measurement_12 : Double\n", + "sensor_measurement_13 : Double\n", + "sensor_measurement_14 : Double\n", + "sensor_measurement_15 : Double\n", + "sensor_measurement_16 : Double\n", + "sensor_measurement_17 : Integer\n", + "sensor_measurement_18 : Integer\n", + "sensor_measurement_19 : Double\n", + "sensor_measurement_20 : Double\n", + "sensor_measurement_21 : Double\n", + "index : Integer; index\n", + "time : Datetime; time_index\n", + "settings_clusters : Integer; foreign_key\n", "\n", "\n", "\n", "engines\n", - "\n", - "engines (249 rows)\n", - "\n", - "engine_no : index\n", - "first_recordings_time : datetime_time_index\n", + "\n", + "engines (249 rows)\n", + "\n", + "engine_no : Integer; index\n", + "first_rul_data_time : Datetime; time_index\n", "\n", - "\n", + "\n", "\n", - "recordings->engines\n", - "\n", - "\n", - "engine_no\n", + "rul_data->engines\n", + "\n", + "\n", + "engine_no\n", "\n", "\n", "\n", "settings_clusters\n", - "\n", - "settings_clusters (50 rows)\n", - "\n", - "settings_clusters : index\n", - "first_recordings_time : datetime_time_index\n", + "\n", + "settings_clusters (50 rows)\n", + "\n", + "settings_clusters : Integer; index\n", + "first_rul_data_time : Datetime; time_index\n", "\n", - "\n", + "\n", "\n", - "recordings->settings_clusters\n", - "\n", - "\n", - "settings_clusters\n", + "rul_data->settings_clusters\n", + "\n", + "\n", + "settings_clusters\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 6, @@ -751,14 +1013,16 @@ { "cell_type": "code", "execution_count": 7, - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Built 304 features\n", - "Elapsed: 02:31 | Progress: 100%|██████████\n" + "Built 228 features\n", + "Elapsed: 01:58 | Progress: 100%|██████████\n" ] }, { @@ -782,26 +1046,26 @@ " \n", "
\n", " \n", - " LAST(recordings.sensor_measurement_21)\n", - " LAST(recordings.sensor_measurement_4)\n", - " LAST(recordings.sensor_measurement_9)\n", - " LAST(recordings.sensor_measurement_10)\n", - " LAST(recordings.sensor_measurement_2)\n", - " LAST(recordings.sensor_measurement_11)\n", - " LAST(recordings.sensor_measurement_8)\n", - " LAST(recordings.sensor_measurement_18)\n", - " LAST(recordings.operational_setting_2)\n", - " LAST(recordings.sensor_measurement_5)\n", + " CID_CE(rul_data.operational_setting_1, normalize=False)\n", + " CID_CE(rul_data.operational_setting_2, normalize=False)\n", + " CID_CE(rul_data.operational_setting_3, normalize=False)\n", + " CID_CE(rul_data.sensor_measurement_1, normalize=False)\n", + " CID_CE(rul_data.sensor_measurement_10, normalize=False)\n", + " CID_CE(rul_data.sensor_measurement_11, normalize=False)\n", + " CID_CE(rul_data.sensor_measurement_12, normalize=False)\n", + " CID_CE(rul_data.sensor_measurement_13, normalize=False)\n", + " CID_CE(rul_data.sensor_measurement_14, normalize=False)\n", + " CID_CE(rul_data.sensor_measurement_15, normalize=False)\n", " ...\n", - " CID_CE(recordings.settings_clusters.CID_CE(recordings.sensor_measurement_14, normalize=False), normalize=False)\n", - " CID_CE(recordings.settings_clusters.LAST(recordings.sensor_measurement_20), normalize=False)\n", - " CID_CE(recordings.settings_clusters.MAX(recordings.sensor_measurement_16), normalize=False)\n", - " CID_CE(recordings.settings_clusters.MAX(recordings.sensor_measurement_19), normalize=False)\n", - " CID_CE(recordings.settings_clusters.LAST(recordings.operational_setting_3), normalize=False)\n", - " CID_CE(recordings.settings_clusters.LAST(recordings.sensor_measurement_21), normalize=False)\n", - " CID_CE(recordings.settings_clusters.MAX(recordings.sensor_measurement_4), normalize=False)\n", - " CID_CE(recordings.settings_clusters.MAX(recordings.operational_setting_1), normalize=False)\n", - " CID_CE(recordings.settings_clusters.MAX(recordings.sensor_measurement_5), normalize=False)\n", + " MAX(rul_data.settings_clusters.LAST(rul_data.sensor_measurement_21))\n", + " MAX(rul_data.settings_clusters.LAST(rul_data.sensor_measurement_3))\n", + " MAX(rul_data.settings_clusters.LAST(rul_data.sensor_measurement_4))\n", + " MAX(rul_data.settings_clusters.LAST(rul_data.sensor_measurement_5))\n", + " MAX(rul_data.settings_clusters.LAST(rul_data.sensor_measurement_6))\n", + " MAX(rul_data.settings_clusters.LAST(rul_data.sensor_measurement_7))\n", + " MAX(rul_data.settings_clusters.LAST(rul_data.sensor_measurement_8))\n", + " MAX(rul_data.settings_clusters.LAST(rul_data.sensor_measurement_9))\n", + " MAX(rul_data.settings_clusters.LAST(rul_data.time_in_cycles))\n", " remaining_useful_life\n", "
\n", "
\n", @@ -832,281 +1096,281 @@ " \n", "
\n", " 1\n", - " 6.3072\n", - " 1113.93\n", - " 8304.43\n", - " 1.01\n", - " 548.90\n", - " 41.80\n", - " 2211.96\n", - " 2212\n", - " 0.8400\n", - " 3.91\n", - " ...\n", - " 65.844882\n", - " 139.955323\n", - " 0.064807\n", - " 79.742945\n", + " 208.777576\n", + " 4.355841\n", " 211.660105\n", - " 84.039649\n", - " 1693.873832\n", - " 208.779151\n", - " 51.059638\n", + " 380.627959\n", + " 1.792150\n", + " 46.543688\n", + " 1939.238240\n", + " 1903.468066\n", + " 1188.077642\n", + " 10.661766\n", + " ...\n", + " 23.4175\n", + " 1578.24\n", + " 1401.95\n", + " 14.62\n", + " 21.58\n", + " 552.98\n", + " 2387.96\n", + " 9048.04\n", + " 111.0\n", " 210\n", "
\n", "
\n", " 1\n", - " 6.2907\n", - " 1113.44\n", - " 8307.98\n", - " 1.02\n", - " 548.92\n", - " 41.71\n", - " 2211.97\n", - " 2212\n", - " 0.8400\n", - " 3.91\n", - " ...\n", - " 91.824908\n", - " 180.779167\n", - " 0.086023\n", - " 106.560992\n", + " 269.413940\n", + " 5.621277\n", " 282.842712\n", - " 108.513678\n", - " 2220.595981\n", - " 269.414339\n", - " 65.551137\n", + " 487.954763\n", + " 2.392969\n", + " 61.084289\n", + " 2503.466467\n", + " 2543.782289\n", + " 1626.735462\n", + " 14.498524\n", + " ...\n", + " 23.4781\n", + " 1584.59\n", + " 1401.95\n", + " 14.62\n", + " 21.58\n", + " 553.53\n", + " 2387.99\n", + " 9054.14\n", + " 191.0\n", " 130\n", "
\n", "
\n", " 2\n", - " 8.8027\n", - " 1128.23\n", - " 8341.37\n", - " 1.02\n", - " 555.49\n", - " 41.81\n", - " 2222.92\n", - " 2223\n", - " 0.8402\n", - " 5.48\n", - " ...\n", - " 242.126348\n", - " 134.050263\n", - " 0.064807\n", - " 73.827621\n", + " 199.764962\n", + " 4.194227\n", " 195.959179\n", - " 80.411061\n", - " 1638.058020\n", - " 199.766926\n", - " 48.863007\n", + " 353.967679\n", + " 1.737872\n", + " 43.901649\n", + " 1862.523931\n", + " 1762.763195\n", + " 1164.156379\n", + " 10.303977\n", + " ...\n", + " 23.4615\n", + " 1589.22\n", + " 1406.87\n", + " 14.62\n", + " 21.61\n", + " 554.47\n", + " 2388.04\n", + " 9055.61\n", + " 101.0\n", " 198\n", "
\n", "
\n", " 2\n", - " 8.8949\n", - " 1118.05\n", - " 8346.87\n", - " 1.02\n", - " 555.80\n", - " 41.81\n", - " 2222.86\n", - " 2223\n", - " 0.8403\n", - " 5.48\n", - " ...\n", - " 302.730551\n", - " 154.102034\n", - " 0.072111\n", - " 90.420000\n", + " 230.523809\n", + " 4.792752\n", " 240.000000\n", - " 92.203085\n", - " 1897.506399\n", - " 230.525935\n", - " 56.138476\n", + " 410.530161\n", + " 2.001300\n", + " 52.038703\n", + " 2141.370941\n", + " 2158.928528\n", + " 1394.027359\n", + " 12.292916\n", + " ...\n", + " 23.3925\n", + " 1588.38\n", + " 1403.93\n", + " 14.62\n", + " 21.61\n", + " 554.26\n", + " 2388.09\n", + " 9053.81\n", + " 141.0\n", " 158\n", "
\n", "
\n", " 3\n", - " 23.5137\n", + " 271.826093\n", + " 5.854931\n", + " 293.938769\n", + " 472.270085\n", + " 2.475318\n", + " 64.716036\n", + " 2576.388154\n", + " 2643.922171\n", + " 1737.037013\n", + " 15.251866\n", + " ...\n", + " 23.5239\n", + " 1590.91\n", " 1402.21\n", - " 9072.08\n", - " 1.30\n", - " 642.13\n", - " 47.27\n", - " 2388.12\n", - " 2388\n", - " 0.0000\n", " 14.62\n", - " ...\n", - " 326.066224\n", - " 184.877804\n", - " 0.089443\n", - " 110.741431\n", - " 293.938769\n", - " 111.023411\n", - " 2315.993077\n", - " 271.827606\n", - " 66.096208\n", + " 21.60\n", + " 557.81\n", + " 2388.12\n", + " 9072.08\n", + " 171.0\n", " 136\n", "
\n", "
\n", "\n", - "

5 rows × 305 columns

\n", + "

5 rows × 229 columns

\n", "" ], "text/plain": [ - " LAST(recordings.sensor_measurement_21) \\\n", - "engine_no \n", - "1 6.3072 \n", - "1 6.2907 \n", - "2 8.8027 \n", - "2 8.8949 \n", - "3 23.5137 \n", + " CID_CE(rul_data.operational_setting_1, normalize=False) \\\n", + "engine_no \n", + "1 208.777576 \n", + "1 269.413940 \n", + "2 199.764962 \n", + "2 230.523809 \n", + "3 271.826093 \n", "\n", - " LAST(recordings.sensor_measurement_4) \\\n", - "engine_no \n", - "1 1113.93 \n", - "1 1113.44 \n", - "2 1128.23 \n", - "2 1118.05 \n", - "3 1402.21 \n", + " CID_CE(rul_data.operational_setting_2, normalize=False) \\\n", + "engine_no \n", + "1 4.355841 \n", + "1 5.621277 \n", + "2 4.194227 \n", + "2 4.792752 \n", + "3 5.854931 \n", "\n", - " LAST(recordings.sensor_measurement_9) \\\n", - "engine_no \n", - "1 8304.43 \n", - "1 8307.98 \n", - "2 8341.37 \n", - "2 8346.87 \n", - "3 9072.08 \n", + " CID_CE(rul_data.operational_setting_3, normalize=False) \\\n", + "engine_no \n", + "1 211.660105 \n", + "1 282.842712 \n", + "2 195.959179 \n", + "2 240.000000 \n", + "3 293.938769 \n", "\n", - " LAST(recordings.sensor_measurement_10) \\\n", - "engine_no \n", - "1 1.01 \n", - "1 1.02 \n", - "2 1.02 \n", - "2 1.02 \n", - "3 1.30 \n", + " CID_CE(rul_data.sensor_measurement_1, normalize=False) \\\n", + "engine_no \n", + "1 380.627959 \n", + "1 487.954763 \n", + "2 353.967679 \n", + "2 410.530161 \n", + "3 472.270085 \n", "\n", - " LAST(recordings.sensor_measurement_2) \\\n", - "engine_no \n", - "1 548.90 \n", - "1 548.92 \n", - "2 555.49 \n", - "2 555.80 \n", - "3 642.13 \n", + " CID_CE(rul_data.sensor_measurement_10, normalize=False) \\\n", + "engine_no \n", + "1 1.792150 \n", + "1 2.392969 \n", + "2 1.737872 \n", + "2 2.001300 \n", + "3 2.475318 \n", "\n", - " LAST(recordings.sensor_measurement_11) \\\n", - "engine_no \n", - "1 41.80 \n", - "1 41.71 \n", - "2 41.81 \n", - "2 41.81 \n", - "3 47.27 \n", + " CID_CE(rul_data.sensor_measurement_11, normalize=False) \\\n", + "engine_no \n", + "1 46.543688 \n", + "1 61.084289 \n", + "2 43.901649 \n", + "2 52.038703 \n", + "3 64.716036 \n", "\n", - " LAST(recordings.sensor_measurement_8) \\\n", - "engine_no \n", - "1 2211.96 \n", - "1 2211.97 \n", - "2 2222.92 \n", - "2 2222.86 \n", - "3 2388.12 \n", + " CID_CE(rul_data.sensor_measurement_12, normalize=False) \\\n", + "engine_no \n", + "1 1939.238240 \n", + "1 2503.466467 \n", + "2 1862.523931 \n", + "2 2141.370941 \n", + "3 2576.388154 \n", "\n", - " LAST(recordings.sensor_measurement_18) \\\n", - "engine_no \n", - "1 2212 \n", - "1 2212 \n", - "2 2223 \n", - "2 2223 \n", - "3 2388 \n", + " CID_CE(rul_data.sensor_measurement_13, normalize=False) \\\n", + "engine_no \n", + "1 1903.468066 \n", + "1 2543.782289 \n", + "2 1762.763195 \n", + "2 2158.928528 \n", + "3 2643.922171 \n", "\n", - " LAST(recordings.operational_setting_2) \\\n", - "engine_no \n", - "1 0.8400 \n", - "1 0.8400 \n", - "2 0.8402 \n", - "2 0.8403 \n", - "3 0.0000 \n", + " CID_CE(rul_data.sensor_measurement_14, normalize=False) \\\n", + "engine_no \n", + "1 1188.077642 \n", + "1 1626.735462 \n", + "2 1164.156379 \n", + "2 1394.027359 \n", + "3 1737.037013 \n", "\n", - " LAST(recordings.sensor_measurement_5) ... \\\n", - "engine_no ... \n", - "1 3.91 ... \n", - "1 3.91 ... \n", - "2 5.48 ... \n", - "2 5.48 ... \n", - "3 14.62 ... \n", + " CID_CE(rul_data.sensor_measurement_15, normalize=False) ... \\\n", + "engine_no ... \n", + "1 10.661766 ... \n", + "1 14.498524 ... \n", + "2 10.303977 ... \n", + "2 12.292916 ... \n", + "3 15.251866 ... \n", "\n", - " CID_CE(recordings.settings_clusters.CID_CE(recordings.sensor_measurement_14, normalize=False), normalize=False) \\\n", - "engine_no \n", - "1 65.844882 \n", - "1 91.824908 \n", - "2 242.126348 \n", - "2 302.730551 \n", - "3 326.066224 \n", + " MAX(rul_data.settings_clusters.LAST(rul_data.sensor_measurement_21)) \\\n", + "engine_no \n", + "1 23.4175 \n", + "1 23.4781 \n", + "2 23.4615 \n", + "2 23.3925 \n", + "3 23.5239 \n", "\n", - " CID_CE(recordings.settings_clusters.LAST(recordings.sensor_measurement_20), normalize=False) \\\n", - "engine_no \n", - "1 139.955323 \n", - "1 180.779167 \n", - "2 134.050263 \n", - "2 154.102034 \n", - "3 184.877804 \n", + " MAX(rul_data.settings_clusters.LAST(rul_data.sensor_measurement_3)) \\\n", + "engine_no \n", + "1 1578.24 \n", + "1 1584.59 \n", + "2 1589.22 \n", + "2 1588.38 \n", + "3 1590.91 \n", "\n", - " CID_CE(recordings.settings_clusters.MAX(recordings.sensor_measurement_16), normalize=False) \\\n", - "engine_no \n", - "1 0.064807 \n", - "1 0.086023 \n", - "2 0.064807 \n", - "2 0.072111 \n", - "3 0.089443 \n", + " MAX(rul_data.settings_clusters.LAST(rul_data.sensor_measurement_4)) \\\n", + "engine_no \n", + "1 1401.95 \n", + "1 1401.95 \n", + "2 1406.87 \n", + "2 1403.93 \n", + "3 1402.21 \n", "\n", - " CID_CE(recordings.settings_clusters.MAX(recordings.sensor_measurement_19), normalize=False) \\\n", - "engine_no \n", - "1 79.742945 \n", - "1 106.560992 \n", - "2 73.827621 \n", - "2 90.420000 \n", - "3 110.741431 \n", + " MAX(rul_data.settings_clusters.LAST(rul_data.sensor_measurement_5)) \\\n", + "engine_no \n", + "1 14.62 \n", + "1 14.62 \n", + "2 14.62 \n", + "2 14.62 \n", + "3 14.62 \n", "\n", - " CID_CE(recordings.settings_clusters.LAST(recordings.operational_setting_3), normalize=False) \\\n", - "engine_no \n", - "1 211.660105 \n", - "1 282.842712 \n", - "2 195.959179 \n", - "2 240.000000 \n", - "3 293.938769 \n", + " MAX(rul_data.settings_clusters.LAST(rul_data.sensor_measurement_6)) \\\n", + "engine_no \n", + "1 21.58 \n", + "1 21.58 \n", + "2 21.61 \n", + "2 21.61 \n", + "3 21.60 \n", "\n", - " CID_CE(recordings.settings_clusters.LAST(recordings.sensor_measurement_21), normalize=False) \\\n", - "engine_no \n", - "1 84.039649 \n", - "1 108.513678 \n", - "2 80.411061 \n", - "2 92.203085 \n", - "3 111.023411 \n", + " MAX(rul_data.settings_clusters.LAST(rul_data.sensor_measurement_7)) \\\n", + "engine_no \n", + "1 552.98 \n", + "1 553.53 \n", + "2 554.47 \n", + "2 554.26 \n", + "3 557.81 \n", "\n", - " CID_CE(recordings.settings_clusters.MAX(recordings.sensor_measurement_4), normalize=False) \\\n", - "engine_no \n", - "1 1693.873832 \n", - "1 2220.595981 \n", - "2 1638.058020 \n", - "2 1897.506399 \n", - "3 2315.993077 \n", + " MAX(rul_data.settings_clusters.LAST(rul_data.sensor_measurement_8)) \\\n", + "engine_no \n", + "1 2387.96 \n", + "1 2387.99 \n", + "2 2388.04 \n", + "2 2388.09 \n", + "3 2388.12 \n", "\n", - " CID_CE(recordings.settings_clusters.MAX(recordings.operational_setting_1), normalize=False) \\\n", - "engine_no \n", - "1 208.779151 \n", - "1 269.414339 \n", - "2 199.766926 \n", - "2 230.525935 \n", - "3 271.827606 \n", + " MAX(rul_data.settings_clusters.LAST(rul_data.sensor_measurement_9)) \\\n", + "engine_no \n", + "1 9048.04 \n", + "1 9054.14 \n", + "2 9055.61 \n", + "2 9053.81 \n", + "3 9072.08 \n", "\n", - " CID_CE(recordings.settings_clusters.MAX(recordings.sensor_measurement_5), normalize=False) \\\n", - "engine_no \n", - "1 51.059638 \n", - "1 65.551137 \n", - "2 48.863007 \n", - "2 56.138476 \n", - "3 66.096208 \n", + " MAX(rul_data.settings_clusters.LAST(rul_data.time_in_cycles)) \\\n", + "engine_no \n", + "1 111.0 \n", + "1 191.0 \n", + "2 101.0 \n", + "2 141.0 \n", + "3 171.0 \n", "\n", " remaining_useful_life \n", "engine_no \n", @@ -1116,7 +1380,7 @@ "2 158 \n", "3 136 \n", "\n", - "[5 rows x 305 columns]" + "[5 rows x 229 columns]" ] }, "execution_count": 7, @@ -1129,7 +1393,7 @@ "\n", "fm, features = ft.dfs(\n", " entityset=es,\n", - " target_entity='engines',\n", + " target_dataframe_name='engines',\n", " agg_primitives=['last', 'max', CidCe(normalize=False)],\n", " trans_primitives=[],\n", " chunk_size=.26,\n", @@ -1151,14 +1415,22 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 4/4 [08:39<00:00, 129.77s/it]\n" + " 0%| | 0/4 [00:00time\n", " remaining_useful_life\n", " \n", - " \n", - " id\n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", @@ -457,13 +458,12 @@ "" ], "text/plain": [ - " engine_no time remaining_useful_life\n", - "id \n", - "0 1 2000-01-01 16:40:00 220\n", - "1 2 2000-01-03 22:10:00 198\n", - "2 3 2000-01-06 00:00:00 206\n", - "3 4 2000-01-08 03:10:00 173\n", - "4 5 2000-01-10 00:50:00 92" + " engine_no time remaining_useful_life\n", + "0 1 2000-01-01 16:40:00 220\n", + "1 2 2000-01-03 22:10:00 198\n", + "2 3 2000-01-06 00:00:00 206\n", + "3 4 2000-01-08 03:10:00 173\n", + "4 5 2000-01-10 00:50:00 92" ] }, "execution_count": 5, @@ -502,22 +502,22 @@ "def make_entityset(data):\n", " es = ft.EntitySet('Dataset')\n", "\n", - " es.entity_from_dataframe(\n", + " es.add_dataframe(\n", " dataframe=data,\n", - " entity_id='recordings',\n", + " dataframe_name='recordings',\n", " index='index',\n", " time_index='time',\n", " )\n", "\n", - " es.normalize_entity(\n", - " base_entity_id='recordings',\n", - " new_entity_id='engines',\n", + " es.normalize_dataframe(\n", + " base_dataframe_name='recordings',\n", + " new_dataframe_name='engines',\n", " index='engine_no',\n", " )\n", "\n", - " es.normalize_entity(\n", - " base_entity_id='recordings',\n", - " new_entity_id='cycles',\n", + " es.normalize_dataframe(\n", + " base_dataframe_name='recordings',\n", + " new_dataframe_name='cycles',\n", " index='time_in_cycles',\n", " )\n", "\n", @@ -533,7 +533,7 @@ "data": { "text/plain": [ "Entityset: Dataset\n", - " Entities:\n", + " DataFrames:\n", " recordings [Rows: 61249, Columns: 28]\n", " engines [Rows: 249, Columns: 2]\n", " cycles [Rows: 543, Columns: 2]\n", @@ -570,86 +570,86 @@ "\n", "\n", - "\n", "\n", - "\n", - "\n", + "\n", + "\n", "Dataset\n", - "\n", + "\n", "\n", "\n", "recordings\n", - "\n", - "recordings (61249 rows)\n", - "\n", - "index : index\n", - "engine_no : id\n", - "time_in_cycles : id\n", - "operational_setting_1 : numeric\n", - "operational_setting_2 : numeric\n", - "operational_setting_3 : numeric\n", - "sensor_measurement_1 : numeric\n", - "sensor_measurement_2 : numeric\n", - "sensor_measurement_3 : numeric\n", - "sensor_measurement_4 : numeric\n", - "sensor_measurement_5 : numeric\n", - "sensor_measurement_6 : numeric\n", - "sensor_measurement_7 : numeric\n", - "sensor_measurement_8 : numeric\n", - "sensor_measurement_9 : numeric\n", - "sensor_measurement_10 : numeric\n", - "sensor_measurement_11 : numeric\n", - "sensor_measurement_12 : numeric\n", - "sensor_measurement_13 : numeric\n", - "sensor_measurement_14 : numeric\n", - "sensor_measurement_15 : numeric\n", - "sensor_measurement_16 : numeric\n", - "sensor_measurement_17 : numeric\n", - "sensor_measurement_18 : numeric\n", - "sensor_measurement_19 : numeric\n", - "sensor_measurement_20 : numeric\n", - "sensor_measurement_21 : numeric\n", - "time : datetime_time_index\n", + "\n", + "recordings (61249 rows)\n", + "\n", + "engine_no : Integer; foreign_key\n", + "time_in_cycles : Integer; foreign_key\n", + "operational_setting_1 : Double\n", + "operational_setting_2 : Double\n", + "operational_setting_3 : Double\n", + "sensor_measurement_1 : Double\n", + "sensor_measurement_2 : Double\n", + "sensor_measurement_3 : Double\n", + "sensor_measurement_4 : Double\n", + "sensor_measurement_5 : Double\n", + "sensor_measurement_6 : Double\n", + "sensor_measurement_7 : Double\n", + "sensor_measurement_8 : Double\n", + "sensor_measurement_9 : Double\n", + "sensor_measurement_10 : Double\n", + "sensor_measurement_11 : Double\n", + "sensor_measurement_12 : Double\n", + "sensor_measurement_13 : Double\n", + "sensor_measurement_14 : Double\n", + "sensor_measurement_15 : Double\n", + "sensor_measurement_16 : Double\n", + "sensor_measurement_17 : Integer\n", + "sensor_measurement_18 : Integer\n", + "sensor_measurement_19 : Double\n", + "sensor_measurement_20 : Double\n", + "sensor_measurement_21 : Double\n", + "index : Integer; index\n", + "time : Datetime; time_index\n", "\n", "\n", "\n", "engines\n", - "\n", - "engines (249 rows)\n", - "\n", - "engine_no : index\n", - "first_recordings_time : datetime_time_index\n", + "\n", + "engines (249 rows)\n", + "\n", + "engine_no : Integer; index\n", + "first_recordings_time : Datetime; time_index\n", "\n", "\n", "\n", "recordings->engines\n", - "\n", - "\n", - "engine_no\n", + "\n", + "\n", + "engine_no\n", "\n", "\n", "\n", "cycles\n", - "\n", - "cycles (543 rows)\n", - "\n", - "time_in_cycles : index\n", - "first_recordings_time : datetime_time_index\n", + "\n", + "cycles (543 rows)\n", + "\n", + "time_in_cycles : Integer; index\n", + "first_recordings_time : Datetime; time_index\n", "\n", "\n", "\n", "recordings->cycles\n", - "\n", - "\n", - "time_in_cycles\n", + "\n", + "\n", + "time_in_cycles\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 8, @@ -680,15 +680,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "Built 292 features\n", - "Elapsed: 02:05 | Progress: 100%|██████████\n" + "Built 219 features\n", + "Elapsed: 01:23 | Progress: 100%|██████████\n" ] } ], "source": [ "fm, features = ft.dfs(\n", " entityset=es,\n", - " target_entity='engines',\n", + " target_dataframe_name='engines',\n", " agg_primitives=['last', 'max', 'min'],\n", " trans_primitives=[],\n", " cutoff_time=label_times,\n", @@ -1305,7 +1305,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.11.3" } }, "nbformat": 4,