diff --git a/samples/04_gis_analysts_data_scientists/classifying_human_activity_using_tabPFN_classifier.ipynb b/samples/04_gis_analysts_data_scientists/classifying_human_activity_using_tabPFN_classifier.ipynb
index e9a0716900..6fad81d70c 100644
--- a/samples/04_gis_analysts_data_scientists/classifying_human_activity_using_tabPFN_classifier.ipynb
+++ b/samples/04_gis_analysts_data_scientists/classifying_human_activity_using_tabPFN_classifier.ipynb
@@ -54,18 +54,9 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 1,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "CPU times: total: 0 ns\n",
- "Wall time: 1.01 ms\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
@@ -79,6 +70,20 @@
"from arcgis.learn import MLModel, prepare_tabulardata"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The TabPFN package can be installed using the following command in python command prompt:"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "conda install -c esri tabpfn"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -88,7 +93,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@@ -106,7 +111,7 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 3,
"metadata": {},
"outputs": [
{
@@ -123,8 +128,8 @@
" train_har_dataset\n",
" \n",
"
HAR dataset
CSV by api_data_owner\n",
- "
Last Modified: January 10, 2025\n",
- "
0 comments, 3 views\n",
+ "
Last Modified: January 11, 2025\n",
+ "
0 comments, 48 views\n",
" \n",
" \n",
" "
@@ -133,7 +138,7 @@
"- "
]
},
- "execution_count": 23,
+ "execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@@ -146,7 +151,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@@ -156,7 +161,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
@@ -382,7 +387,7 @@
"[5 rows x 563 columns]"
]
},
- "execution_count": 25,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -395,7 +400,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -404,7 +409,7 @@
"(1020, 563)"
]
},
- "execution_count": 26,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -422,7 +427,7 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
@@ -439,8 +444,8 @@
" test_har_dataset\n",
" \n",
"
HAR dataset
CSV by api_data_owner\n",
- "
Last Modified: January 10, 2025\n",
- "
0 comments, 0 views\n",
+ "
Last Modified: January 11, 2025\n",
+ "
0 comments, 55 views\n",
" \n",
" \n",
" "
@@ -449,7 +454,7 @@
"- "
]
},
- "execution_count": 27,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -462,7 +467,7 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
@@ -472,7 +477,7 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 9,
"metadata": {},
"outputs": [
{
@@ -698,7 +703,7 @@
"[5 rows x 563 columns]"
]
},
- "execution_count": 29,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@@ -711,7 +716,7 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 11,
"metadata": {},
"outputs": [
{
@@ -720,7 +725,7 @@
"(6332, 563)"
]
},
- "execution_count": 30,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -738,7 +743,7 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
@@ -749,7 +754,7 @@
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 13,
"metadata": {
"scrolled": true
},
@@ -1320,7 +1325,7 @@
" 'angle(Z,gravityMean)']"
]
},
- "execution_count": 32,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@@ -1331,7 +1336,7 @@
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
@@ -1340,7 +1345,7 @@
"561"
]
},
- "execution_count": 33,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -1365,7 +1370,7 @@
},
{
"cell_type": "code",
- "execution_count": 34,
+ "execution_count": 16,
"metadata": {},
"outputs": [
{
@@ -1374,18 +1379,18 @@
"(1020, 6)"
]
},
- "execution_count": 34,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Data processing to reduce the features to 100 or less as required for TabPFN models\n",
- "X = train_har_data.drop(columns=['Activity'])\n",
+ "X = train_har_data.drop(columns=['subject','Activity'])\n",
"y = train_har_data['Activity']\n",
"scaler = StandardScaler()\n",
"X_scaled = scaler.fit_transform(X)\n",
- "lda = LinearDiscriminantAnalysis(n_components=min(100, len(set(y)) - 1))\n",
+ "lda = LinearDiscriminantAnalysis() \n",
"X_reduced_lda = lda.fit_transform(X_scaled, y)\n",
"X_train_lda_df = pd.DataFrame(X_reduced_lda, columns=[f'LDA{i+1}' for i in range(X_reduced_lda.shape[1])])\n",
"X_train_lda_df['Activity'] = y.reset_index(drop=True)\n",
@@ -1394,7 +1399,7 @@
},
{
"cell_type": "code",
- "execution_count": 36,
+ "execution_count": 17,
"metadata": {},
"outputs": [
{
@@ -1403,7 +1408,7 @@
"Index(['LDA1', 'LDA2', 'LDA3', 'LDA4', 'LDA5', 'Activity'], dtype='object')"
]
},
- "execution_count": 36,
+ "execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
@@ -1422,7 +1427,7 @@
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": 18,
"metadata": {},
"outputs": [
{
@@ -1431,7 +1436,7 @@
"5"
]
},
- "execution_count": 35,
+ "execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
@@ -1454,7 +1459,7 @@
},
{
"cell_type": "code",
- "execution_count": 37,
+ "execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
@@ -1477,7 +1482,7 @@
},
{
"cell_type": "code",
- "execution_count": 38,
+ "execution_count": 20,
"metadata": {},
"outputs": [
{
@@ -1513,47 +1518,47 @@
"
\n",
" | 28 | \n",
" STANDING | \n",
- " -19.431367 | \n",
- " -11.174415 | \n",
- " 0.816325 | \n",
- " -0.687153 | \n",
- " 2.809134 | \n",
+ " -19.417553 | \n",
+ " -11.243740 | \n",
+ " 0.918685 | \n",
+ " -0.649895 | \n",
+ " 2.771662 | \n",
"
\n",
" \n",
" | 130 | \n",
" SITTING | \n",
- " -18.377005 | \n",
- " -9.429310 | \n",
- " 0.342490 | \n",
- " -0.757008 | \n",
- " -4.074501 | \n",
+ " -18.471293 | \n",
+ " -9.240233 | \n",
+ " 0.375931 | \n",
+ " -0.752110 | \n",
+ " -4.075432 | \n",
"
\n",
" \n",
" | 311 | \n",
" WALKING | \n",
- " 17.852727 | \n",
- " -1.390446 | \n",
- " -8.164604 | \n",
- " 4.176480 | \n",
- " -0.160302 | \n",
+ " 17.773490 | \n",
+ " -1.438111 | \n",
+ " -8.164614 | \n",
+ " 4.168316 | \n",
+ " -0.154530 | \n",
"
\n",
" \n",
" | 734 | \n",
" WALKING_UPSTAIRS | \n",
- " 23.633640 | \n",
- " 2.301121 | \n",
- " 2.475455 | \n",
- " -10.447339 | \n",
- " -0.302447 | \n",
+ " 23.623186 | \n",
+ " 2.154564 | \n",
+ " 2.490316 | \n",
+ " -10.440684 | \n",
+ " -0.306971 | \n",
"
\n",
" \n",
" | 847 | \n",
" LAYING | \n",
- " -26.620913 | \n",
- " 14.783496 | \n",
- " -0.705847 | \n",
- " 0.511383 | \n",
- " -0.568820 | \n",
+ " -26.261936 | \n",
+ " 14.538433 | \n",
+ " -0.750472 | \n",
+ " 0.509736 | \n",
+ " -0.579957 | \n",
"
\n",
" \n",
"\n",
@@ -1561,14 +1566,14 @@
],
"text/plain": [
" Activity LDA1 LDA2 LDA3 LDA4 LDA5\n",
- "28 STANDING -19.431367 -11.174415 0.816325 -0.687153 2.809134\n",
- "130 SITTING -18.377005 -9.429310 0.342490 -0.757008 -4.074501\n",
- "311 WALKING 17.852727 -1.390446 -8.164604 4.176480 -0.160302\n",
- "734 WALKING_UPSTAIRS 23.633640 2.301121 2.475455 -10.447339 -0.302447\n",
- "847 LAYING -26.620913 14.783496 -0.705847 0.511383 -0.568820"
+ "28 STANDING -19.417553 -11.243740 0.918685 -0.649895 2.771662\n",
+ "130 SITTING -18.471293 -9.240233 0.375931 -0.752110 -4.075432\n",
+ "311 WALKING 17.773490 -1.438111 -8.164614 4.168316 -0.154530\n",
+ "734 WALKING_UPSTAIRS 23.623186 2.154564 2.490316 -10.440684 -0.306971\n",
+ "847 LAYING -26.261936 14.538433 -0.750472 0.509736 -0.579957"
]
},
- "execution_count": 38,
+ "execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
@@ -1596,7 +1601,7 @@
},
{
"cell_type": "code",
- "execution_count": 39,
+ "execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
@@ -1614,7 +1619,7 @@
},
{
"cell_type": "code",
- "execution_count": 40,
+ "execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
@@ -1623,7 +1628,7 @@
},
{
"cell_type": "code",
- "execution_count": 41,
+ "execution_count": 23,
"metadata": {},
"outputs": [
{
@@ -1632,7 +1637,7 @@
"0.9901960784313726"
]
},
- "execution_count": 41,
+ "execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
@@ -1659,9 +1664,16 @@
},
{
"cell_type": "code",
- "execution_count": 42,
+ "execution_count": 24,
"metadata": {},
"outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n"
+ ]
+ },
{
"data": {
"text/html": [
@@ -1696,51 +1708,51 @@
" \n",
" | 101 | \n",
" LAYING | \n",
- " -27.925746 | \n",
- " 17.698939 | \n",
- " -0.211993 | \n",
- " 0.137931 | \n",
- " -0.872844 | \n",
+ " -27.572224 | \n",
+ " 17.536857 | \n",
+ " -0.293677 | \n",
+ " 0.122248 | \n",
+ " -0.868357 | \n",
" LAYING | \n",
"
\n",
" \n",
" | 299 | \n",
" WALKING | \n",
- " 22.230659 | \n",
- " 0.572533 | \n",
- " -8.318169 | \n",
- " 1.275016 | \n",
- " 0.139479 | \n",
+ " 22.234214 | \n",
+ " 0.341170 | \n",
+ " -8.293393 | \n",
+ " 1.279935 | \n",
+ " 0.128431 | \n",
" WALKING | \n",
"
\n",
" \n",
" | 693 | \n",
" SITTING | \n",
- " -18.977440 | \n",
- " -10.123049 | \n",
- " 0.551960 | \n",
- " -0.028800 | \n",
- " -3.010251 | \n",
+ " -19.069578 | \n",
+ " -9.945686 | \n",
+ " 0.593088 | \n",
+ " -0.020452 | \n",
+ " -3.014706 | \n",
" SITTING | \n",
"
\n",
" \n",
" | 884 | \n",
" WALKING | \n",
- " 23.685543 | \n",
- " -0.261172 | \n",
- " -11.338481 | \n",
- " 3.464041 | \n",
- " -0.628183 | \n",
+ " 23.716562 | \n",
+ " -0.591834 | \n",
+ " -11.292542 | \n",
+ " 3.476408 | \n",
+ " -0.650639 | \n",
" WALKING | \n",
"
\n",
" \n",
" | 967 | \n",
" SITTING | \n",
- " -18.285170 | \n",
- " -11.017798 | \n",
- " 1.873937 | \n",
- " -0.702550 | \n",
- " -1.983438 | \n",
+ " -18.414386 | \n",
+ " -10.779558 | \n",
+ " 1.910474 | \n",
+ " -0.696723 | \n",
+ " -1.982649 | \n",
" SITTING | \n",
"
\n",
" \n",
@@ -1749,11 +1761,11 @@
],
"text/plain": [
" Activity LDA1 LDA2 LDA3 LDA4 LDA5 \\\n",
- "101 LAYING -27.925746 17.698939 -0.211993 0.137931 -0.872844 \n",
- "299 WALKING 22.230659 0.572533 -8.318169 1.275016 0.139479 \n",
- "693 SITTING -18.977440 -10.123049 0.551960 -0.028800 -3.010251 \n",
- "884 WALKING 23.685543 -0.261172 -11.338481 3.464041 -0.628183 \n",
- "967 SITTING -18.285170 -11.017798 1.873937 -0.702550 -1.983438 \n",
+ "101 LAYING -27.572224 17.536857 -0.293677 0.122248 -0.868357 \n",
+ "299 WALKING 22.234214 0.341170 -8.293393 1.279935 0.128431 \n",
+ "693 SITTING -19.069578 -9.945686 0.593088 -0.020452 -3.014706 \n",
+ "884 WALKING 23.716562 -0.591834 -11.292542 3.476408 -0.650639 \n",
+ "967 SITTING -18.414386 -10.779558 1.910474 -0.696723 -1.982649 \n",
"\n",
" Activity_results \n",
"101 LAYING \n",
@@ -1763,7 +1775,7 @@
"967 SITTING "
]
},
- "execution_count": 42,
+ "execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
@@ -1785,7 +1797,7 @@
},
{
"cell_type": "code",
- "execution_count": 43,
+ "execution_count": 25,
"metadata": {},
"outputs": [
{
@@ -1798,11 +1810,11 @@
],
"source": [
"# Align tset data with the train data format \n",
- "X = test_har_data.drop(columns=['Activity'])\n",
+ "X = test_har_data.drop(columns=['subject','Activity'])\n",
"y = test_har_data['Activity']\n",
"scaler = StandardScaler()\n",
- "X_scaled = scaler.fit_transform(X)\n",
- "lda = LinearDiscriminantAnalysis(n_components=min(100, len(set(y)) - 1)) \n",
+ "X_scaled = scaler.fit_transform(X) \n",
+ "lda = LinearDiscriminantAnalysis() \n",
"X_reduced_lda = lda.fit_transform(X_scaled, y)\n",
"X_test_lda_df = pd.DataFrame(X_reduced_lda, columns=[f'LDA{i+1}' for i in range(X_reduced_lda.shape[1])])\n",
"X_test_lda_df['Activity'] = y.reset_index(drop=True)\n",
@@ -1811,7 +1823,7 @@
},
{
"cell_type": "code",
- "execution_count": 44,
+ "execution_count": 26,
"metadata": {},
"outputs": [
{
@@ -1902,7 +1914,7 @@
"4 -9.596161 -6.980061 0.480017 -0.284537 1.103180 STANDING"
]
},
- "execution_count": 44,
+ "execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
@@ -1920,16 +1932,24 @@
},
{
"cell_type": "code",
- "execution_count": 45,
+ "execution_count": 27,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n"
+ ]
+ }
+ ],
"source": [
"activity_predicted_tabpfn = tabpfn_classifier.predict(X_test_lda_df, prediction_type='dataframe')"
]
},
{
"cell_type": "code",
- "execution_count": 46,
+ "execution_count": 28,
"metadata": {},
"outputs": [
{
@@ -2033,7 +2053,7 @@
"6331 WALKING_UPSTAIRS "
]
},
- "execution_count": 46,
+ "execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
@@ -2053,14 +2073,14 @@
},
{
"cell_type": "code",
- "execution_count": 48,
+ "execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Accuracy: 96.83%\n",
+ "Accuracy: 96.72%\n",
"Precision: 0.97\n",
"Recall: 0.97\n",
"F1 Score: 0.97\n",
@@ -2069,8 +2089,8 @@
" precision recall f1-score support\n",
"\n",
" LAYING 1.00 1.00 1.00 1219\n",
- " SITTING 1.00 0.83 0.91 1119\n",
- " STANDING 0.86 0.99 0.93 1197\n",
+ " SITTING 1.00 0.83 0.90 1119\n",
+ " STANDING 0.86 0.99 0.92 1197\n",
" WALKING 1.00 1.00 1.00 1031\n",
"WALKING_DOWNSTAIRS 1.00 0.99 1.00 835\n",
" WALKING_UPSTAIRS 0.99 1.00 1.00 931\n",
@@ -2114,7 +2134,7 @@
"source": [
"The performance metrics obtained from the trained TabPFN model on the test dataset of 6,332 samples indicate excellent classification quality.\n",
"\n",
- "`Accuracy (96.81%)` : The model correctly classified approximately 97% of the samples, which is a strong indication of its ability to generalize well to unseen data, despite being trained on a smaller dataset of just 1,020 samples.\n",
+ "`Accuracy (96.72%)` : The model correctly classified approximately 97% of the samples, which is a strong indication of its ability to generalize well to unseen data, despite being trained on a smaller dataset of just 1,020 samples.\n",
"\n",
"`Precision (0.97)` : Precision measures the proportion of true positive predictions among all positive predictions made by the model. A precision of 0.97 means that 97% of the predicted positive activity classes are correct, indicating that the model rarely makes false positive errors.\n",
"\n",
@@ -2158,9 +2178,9 @@
],
"metadata": {
"kernelspec": {
- "display_name": "pro3.5_LearnLesson2025",
+ "display_name": "pro3.6_certify2.4.2_24Sep2025 [conda env:conda-pro3.6_certify2.4.2_24Sep2025] *",
"language": "python",
- "name": "pro3.5_learnlesson2025"
+ "name": "conda-env-conda-pro3.6_certify2.4.2_24Sep2025-pro3.6_certify2.4.2_24sep2025"
},
"language_info": {
"codemirror_mode": {
@@ -2172,7 +2192,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.11"
+ "version": "3.13.7"
}
},
"nbformat": 4,