diff --git a/Advanced Featuretools RUL.ipynb b/Advanced Featuretools RUL.ipynb
index cb476e5..de805d4 100644
--- a/Advanced Featuretools RUL.ipynb
+++ b/Advanced Featuretools RUL.ipynb
@@ -345,7 +345,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Elapsed: 00:01 | Remaining: 00:00 | Progress: 100%|██████████| engine_no: 2490/2490 \n"
+ "Elapsed: 00:00 | Remaining: ? | Progress: 0%| | engine_no: 0/2490 "
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Elapsed: 00:00 | Remaining: 00:00 | Progress: 100%|██████████| engine_no: 2490/2490 \n"
]
},
{
@@ -373,12 +380,6 @@
"
time | \n",
" remaining_useful_life | \n",
" \n",
- " \n",
- " id | \n",
- " | \n",
- " | \n",
- " | \n",
- "
\n",
" \n",
" \n",
" \n",
@@ -416,13 +417,12 @@
""
],
"text/plain": [
- " engine_no time remaining_useful_life\n",
- "id \n",
- "0 1 2000-01-01 16:40:00 220\n",
- "1 1 2000-01-01 18:20:00 210\n",
- "2 1 2000-01-01 20:00:00 200\n",
- "3 1 2000-01-01 21:40:00 190\n",
- "4 1 2000-01-01 23:20:00 180"
+ " engine_no time remaining_useful_life\n",
+ "0 1 2000-01-01 16:40:00 220\n",
+ "1 1 2000-01-01 18:20:00 210\n",
+ "2 1 2000-01-01 20:00:00 200\n",
+ "3 1 2000-01-01 21:40:00 190\n",
+ "4 1 2000-01-01 23:20:00 180"
]
},
"execution_count": 3,
@@ -435,7 +435,7 @@
" return len(df) - 1\n",
"\n",
"lm = cp.LabelMaker(\n",
- " target_entity='engine_no',\n",
+ " target_dataframe_index='engine_no',\n",
" time_index='time',\n",
" labeling_function=remaining_useful_life,\n",
")\n",
@@ -488,12 +488,6 @@
" time | \n",
" remaining_useful_life | \n",
"
\n",
- " \n",
- " id | \n",
- " | \n",
- " | \n",
- " | \n",
- "
\n",
" \n",
" \n",
" \n",
@@ -532,7 +526,6 @@
],
"text/plain": [
" engine_no time remaining_useful_life\n",
- "id \n",
"1 1 2000-01-01 18:20:00 210\n",
"9 1 2000-01-02 07:40:00 130\n",
"10 2 2000-01-03 22:10:00 198\n",
@@ -568,18 +561,288 @@
"cell_type": "code",
"execution_count": 5,
"metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " engine_no | \n",
+ " time_in_cycles | \n",
+ " operational_setting_1 | \n",
+ " operational_setting_2 | \n",
+ " operational_setting_3 | \n",
+ " sensor_measurement_1 | \n",
+ " sensor_measurement_2 | \n",
+ " sensor_measurement_3 | \n",
+ " sensor_measurement_4 | \n",
+ " sensor_measurement_5 | \n",
+ " ... | \n",
+ " sensor_measurement_14 | \n",
+ " sensor_measurement_15 | \n",
+ " sensor_measurement_16 | \n",
+ " sensor_measurement_17 | \n",
+ " sensor_measurement_18 | \n",
+ " sensor_measurement_19 | \n",
+ " sensor_measurement_20 | \n",
+ " sensor_measurement_21 | \n",
+ " index | \n",
+ " time | \n",
+ "
\n",
+ " \n",
+ " index | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 42.0049 | \n",
+ " 0.8400 | \n",
+ " 100.0 | \n",
+ " 445.00 | \n",
+ " 549.68 | \n",
+ " 1343.43 | \n",
+ " 1112.93 | \n",
+ " 3.91 | \n",
+ " ... | \n",
+ " 8074.83 | \n",
+ " 9.3335 | \n",
+ " 0.02 | \n",
+ " 330 | \n",
+ " 2212 | \n",
+ " 100.00 | \n",
+ " 10.62 | \n",
+ " 6.3670 | \n",
+ " 0 | \n",
+ " 2000-01-01 00:00:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 20.0020 | \n",
+ " 0.7002 | \n",
+ " 100.0 | \n",
+ " 491.19 | \n",
+ " 606.07 | \n",
+ " 1477.61 | \n",
+ " 1237.50 | \n",
+ " 9.35 | \n",
+ " ... | \n",
+ " 8046.13 | \n",
+ " 9.1913 | \n",
+ " 0.02 | \n",
+ " 361 | \n",
+ " 2324 | \n",
+ " 100.00 | \n",
+ " 24.37 | \n",
+ " 14.6552 | \n",
+ " 1 | \n",
+ " 2000-01-01 00:10:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 42.0038 | \n",
+ " 0.8409 | \n",
+ " 100.0 | \n",
+ " 445.00 | \n",
+ " 548.95 | \n",
+ " 1343.12 | \n",
+ " 1117.05 | \n",
+ " 3.91 | \n",
+ " ... | \n",
+ " 8066.62 | \n",
+ " 9.4007 | \n",
+ " 0.02 | \n",
+ " 329 | \n",
+ " 2212 | \n",
+ " 100.00 | \n",
+ " 10.48 | \n",
+ " 6.4213 | \n",
+ " 2 | \n",
+ " 2000-01-01 00:20:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 4 | \n",
+ " 42.0000 | \n",
+ " 0.8400 | \n",
+ " 100.0 | \n",
+ " 445.00 | \n",
+ " 548.70 | \n",
+ " 1341.24 | \n",
+ " 1118.03 | \n",
+ " 3.91 | \n",
+ " ... | \n",
+ " 8076.05 | \n",
+ " 9.3369 | \n",
+ " 0.02 | \n",
+ " 328 | \n",
+ " 2212 | \n",
+ " 100.00 | \n",
+ " 10.54 | \n",
+ " 6.4176 | \n",
+ " 3 | \n",
+ " 2000-01-01 00:30:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 5 | \n",
+ " 25.0063 | \n",
+ " 0.6207 | \n",
+ " 60.0 | \n",
+ " 462.54 | \n",
+ " 536.10 | \n",
+ " 1255.23 | \n",
+ " 1033.59 | \n",
+ " 7.05 | \n",
+ " ... | \n",
+ " 7865.80 | \n",
+ " 10.8366 | \n",
+ " 0.02 | \n",
+ " 305 | \n",
+ " 1915 | \n",
+ " 84.93 | \n",
+ " 14.03 | \n",
+ " 8.6754 | \n",
+ " 4 | \n",
+ " 2000-01-01 00:40:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 28 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " engine_no time_in_cycles operational_setting_1 \\\n",
+ "index \n",
+ "0 1 1 42.0049 \n",
+ "1 1 2 20.0020 \n",
+ "2 1 3 42.0038 \n",
+ "3 1 4 42.0000 \n",
+ "4 1 5 25.0063 \n",
+ "\n",
+ " operational_setting_2 operational_setting_3 sensor_measurement_1 \\\n",
+ "index \n",
+ "0 0.8400 100.0 445.00 \n",
+ "1 0.7002 100.0 491.19 \n",
+ "2 0.8409 100.0 445.00 \n",
+ "3 0.8400 100.0 445.00 \n",
+ "4 0.6207 60.0 462.54 \n",
+ "\n",
+ " sensor_measurement_2 sensor_measurement_3 sensor_measurement_4 \\\n",
+ "index \n",
+ "0 549.68 1343.43 1112.93 \n",
+ "1 606.07 1477.61 1237.50 \n",
+ "2 548.95 1343.12 1117.05 \n",
+ "3 548.70 1341.24 1118.03 \n",
+ "4 536.10 1255.23 1033.59 \n",
+ "\n",
+ " sensor_measurement_5 ... sensor_measurement_14 \\\n",
+ "index ... \n",
+ "0 3.91 ... 8074.83 \n",
+ "1 9.35 ... 8046.13 \n",
+ "2 3.91 ... 8066.62 \n",
+ "3 3.91 ... 8076.05 \n",
+ "4 7.05 ... 7865.80 \n",
+ "\n",
+ " sensor_measurement_15 sensor_measurement_16 sensor_measurement_17 \\\n",
+ "index \n",
+ "0 9.3335 0.02 330 \n",
+ "1 9.1913 0.02 361 \n",
+ "2 9.4007 0.02 329 \n",
+ "3 9.3369 0.02 328 \n",
+ "4 10.8366 0.02 305 \n",
+ "\n",
+ " sensor_measurement_18 sensor_measurement_19 sensor_measurement_20 \\\n",
+ "index \n",
+ "0 2212 100.00 10.62 \n",
+ "1 2324 100.00 24.37 \n",
+ "2 2212 100.00 10.48 \n",
+ "3 2212 100.00 10.54 \n",
+ "4 1915 84.93 14.03 \n",
+ "\n",
+ " sensor_measurement_21 index time \n",
+ "index \n",
+ "0 6.3670 0 2000-01-01 00:00:00 \n",
+ "1 14.6552 1 2000-01-01 00:10:00 \n",
+ "2 6.4213 2 2000-01-01 00:20:00 \n",
+ "3 6.4176 3 2000-01-01 00:30:00 \n",
+ "4 8.6754 4 2000-01-01 00:40:00 \n",
+ "\n",
+ "[5 rows x 28 columns]"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "scrolled": true
+ },
"outputs": [
{
"data": {
"text/plain": [
"Entityset: Dataset\n",
- " Entities:\n",
- " recordings [Rows: 61249, Columns: 29]\n",
+ " DataFrames:\n",
+ " rul_data [Rows: 61249, Columns: 29]\n",
" engines [Rows: 249, Columns: 2]\n",
" settings_clusters [Rows: 50, Columns: 2]\n",
" Relationships:\n",
- " recordings.engine_no -> engines.engine_no\n",
- " recordings.settings_clusters -> settings_clusters.settings_clusters"
+ " rul_data.engine_no -> engines.engine_no\n",
+ " rul_data.settings_clusters -> settings_clusters.settings_clusters"
]
},
"execution_count": 5,
@@ -604,28 +867,27 @@
"\n",
" es = ft.EntitySet('Dataset')\n",
"\n",
- " es.entity_from_dataframe(\n",
+ " es.add_dataframe(\n",
" dataframe=data,\n",
- " entity_id='recordings',\n",
+ " dataframe_name='rul_data',\n",
" index='index',\n",
" time_index='time',\n",
" )\n",
"\n",
- " es.normalize_entity(\n",
- " base_entity_id='recordings',\n",
- " new_entity_id='engines',\n",
+ " es.normalize_dataframe(\n",
+ " base_dataframe_name ='rul_data',\n",
+ " new_dataframe_name ='engines',\n",
" index='engine_no',\n",
" )\n",
"\n",
- " es.normalize_entity(\n",
- " base_entity_id='recordings',\n",
- " new_entity_id='settings_clusters',\n",
- " index='settings_clusters',\n",
+ " es.normalize_dataframe(\n",
+ " base_dataframe_name='rul_data',\n",
+ " new_dataframe_name='settings_clusters',\n",
+ " index='settings_clusters'\n",
" )\n",
"\n",
" return es, kmeans\n",
"\n",
- "\n",
"es, kmeans = make_entityset(data, nclusters)\n",
"es"
]
@@ -648,87 +910,87 @@
"\n",
"\n",
- "\n",
"\n",
- "
\n",
- " \n",
- " id | \n",
- " | \n",
- " | \n",
- " | \n",
- "
\n",
" \n",
" \n",
" \n",
@@ -457,13 +458,12 @@
""
],
"text/plain": [
- " engine_no time remaining_useful_life\n",
- "id \n",
- "0 1 2000-01-01 16:40:00 220\n",
- "1 2 2000-01-03 22:10:00 198\n",
- "2 3 2000-01-06 00:00:00 206\n",
- "3 4 2000-01-08 03:10:00 173\n",
- "4 5 2000-01-10 00:50:00 92"
+ " engine_no time remaining_useful_life\n",
+ "0 1 2000-01-01 16:40:00 220\n",
+ "1 2 2000-01-03 22:10:00 198\n",
+ "2 3 2000-01-06 00:00:00 206\n",
+ "3 4 2000-01-08 03:10:00 173\n",
+ "4 5 2000-01-10 00:50:00 92"
]
},
"execution_count": 5,
@@ -502,22 +502,22 @@
"def make_entityset(data):\n",
" es = ft.EntitySet('Dataset')\n",
"\n",
- " es.entity_from_dataframe(\n",
+ " es.add_dataframe(\n",
" dataframe=data,\n",
- " entity_id='recordings',\n",
+ " dataframe_name='recordings',\n",
" index='index',\n",
" time_index='time',\n",
" )\n",
"\n",
- " es.normalize_entity(\n",
- " base_entity_id='recordings',\n",
- " new_entity_id='engines',\n",
+ " es.normalize_dataframe(\n",
+ " base_dataframe_name='recordings',\n",
+ " new_dataframe_name='engines',\n",
" index='engine_no',\n",
" )\n",
"\n",
- " es.normalize_entity(\n",
- " base_entity_id='recordings',\n",
- " new_entity_id='cycles',\n",
+ " es.normalize_dataframe(\n",
+ " base_dataframe_name='recordings',\n",
+ " new_dataframe_name='cycles',\n",
" index='time_in_cycles',\n",
" )\n",
"\n",
@@ -533,7 +533,7 @@
"data": {
"text/plain": [
"Entityset: Dataset\n",
- " Entities:\n",
+ " DataFrames:\n",
" recordings [Rows: 61249, Columns: 28]\n",
" engines [Rows: 249, Columns: 2]\n",
" cycles [Rows: 543, Columns: 2]\n",
@@ -570,86 +570,86 @@
"\n",
"\n",
- "\n",
"\n",
- "\n",
- "\n",
+ "\n",
+ "\n",
"Dataset\n",
- "\n",
+ "\n",
"\n",
"\n",
"recordings\n",
- "\n",
- "recordings (61249 rows)\n",
- "\n",
- "index : index\n",
- "engine_no : id\n",
- "time_in_cycles : id\n",
- "operational_setting_1 : numeric\n",
- "operational_setting_2 : numeric\n",
- "operational_setting_3 : numeric\n",
- "sensor_measurement_1 : numeric\n",
- "sensor_measurement_2 : numeric\n",
- "sensor_measurement_3 : numeric\n",
- "sensor_measurement_4 : numeric\n",
- "sensor_measurement_5 : numeric\n",
- "sensor_measurement_6 : numeric\n",
- "sensor_measurement_7 : numeric\n",
- "sensor_measurement_8 : numeric\n",
- "sensor_measurement_9 : numeric\n",
- "sensor_measurement_10 : numeric\n",
- "sensor_measurement_11 : numeric\n",
- "sensor_measurement_12 : numeric\n",
- "sensor_measurement_13 : numeric\n",
- "sensor_measurement_14 : numeric\n",
- "sensor_measurement_15 : numeric\n",
- "sensor_measurement_16 : numeric\n",
- "sensor_measurement_17 : numeric\n",
- "sensor_measurement_18 : numeric\n",
- "sensor_measurement_19 : numeric\n",
- "sensor_measurement_20 : numeric\n",
- "sensor_measurement_21 : numeric\n",
- "time : datetime_time_index\n",
+ "\n",
+ "recordings (61249 rows)\n",
+ "\n",
+ "engine_no : Integer; foreign_key\n",
+ "time_in_cycles : Integer; foreign_key\n",
+ "operational_setting_1 : Double\n",
+ "operational_setting_2 : Double\n",
+ "operational_setting_3 : Double\n",
+ "sensor_measurement_1 : Double\n",
+ "sensor_measurement_2 : Double\n",
+ "sensor_measurement_3 : Double\n",
+ "sensor_measurement_4 : Double\n",
+ "sensor_measurement_5 : Double\n",
+ "sensor_measurement_6 : Double\n",
+ "sensor_measurement_7 : Double\n",
+ "sensor_measurement_8 : Double\n",
+ "sensor_measurement_9 : Double\n",
+ "sensor_measurement_10 : Double\n",
+ "sensor_measurement_11 : Double\n",
+ "sensor_measurement_12 : Double\n",
+ "sensor_measurement_13 : Double\n",
+ "sensor_measurement_14 : Double\n",
+ "sensor_measurement_15 : Double\n",
+ "sensor_measurement_16 : Double\n",
+ "sensor_measurement_17 : Integer\n",
+ "sensor_measurement_18 : Integer\n",
+ "sensor_measurement_19 : Double\n",
+ "sensor_measurement_20 : Double\n",
+ "sensor_measurement_21 : Double\n",
+ "index : Integer; index\n",
+ "time : Datetime; time_index\n",
"\n",
"\n",
"\n",
"engines\n",
- "\n",
- "engines (249 rows)\n",
- "\n",
- "engine_no : index\n",
- "first_recordings_time : datetime_time_index\n",
+ "\n",
+ "engines (249 rows)\n",
+ "\n",
+ "engine_no : Integer; index\n",
+ "first_recordings_time : Datetime; time_index\n",
"\n",
"\n",
"\n",
"recordings->engines\n",
- "\n",
- "\n",
- "engine_no\n",
+ "\n",
+ "\n",
+ "engine_no\n",
"\n",
"\n",
"\n",
"cycles\n",
- "\n",
- "cycles (543 rows)\n",
- "\n",
- "time_in_cycles : index\n",
- "first_recordings_time : datetime_time_index\n",
+ "\n",
+ "cycles (543 rows)\n",
+ "\n",
+ "time_in_cycles : Integer; index\n",
+ "first_recordings_time : Datetime; time_index\n",
"\n",
"\n",
"\n",
"recordings->cycles\n",
- "\n",
- "\n",
- "time_in_cycles\n",
+ "\n",
+ "\n",
+ "time_in_cycles\n",
"\n",
"\n",
"\n"
],
"text/plain": [
- ""
+ ""
]
},
"execution_count": 8,
@@ -680,15 +680,15 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Built 292 features\n",
- "Elapsed: 02:05 | Progress: 100%|██████████\n"
+ "Built 219 features\n",
+ "Elapsed: 01:23 | Progress: 100%|██████████\n"
]
}
],
"source": [
"fm, features = ft.dfs(\n",
" entityset=es,\n",
- " target_entity='engines',\n",
+ " target_dataframe_name='engines',\n",
" agg_primitives=['last', 'max', 'min'],\n",
" trans_primitives=[],\n",
" cutoff_time=label_times,\n",
@@ -1305,7 +1305,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.4"
+ "version": "3.11.3"
}
},
"nbformat": 4,