diff --git a/samples/04_gis_analysts_data_scientists/classifying_human_activity_using_tabPFN_classifier.ipynb b/samples/04_gis_analysts_data_scientists/classifying_human_activity_using_tabPFN_classifier.ipynb index e9a0716900..6fad81d70c 100644 --- a/samples/04_gis_analysts_data_scientists/classifying_human_activity_using_tabPFN_classifier.ipynb +++ b/samples/04_gis_analysts_data_scientists/classifying_human_activity_using_tabPFN_classifier.ipynb @@ -54,18 +54,9 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 1, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: total: 0 ns\n", - "Wall time: 1.01 ms\n" - ] - } - ], + "outputs": [], "source": [ "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", @@ -79,6 +70,20 @@ "from arcgis.learn import MLModel, prepare_tabulardata" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The TabPFN package can be installed using the following command in python command prompt:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "conda install -c esri tabpfn" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -88,7 +93,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -106,7 +111,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -123,8 +128,8 @@ " train_har_dataset\n", " \n", "
HAR dataset
CSV by api_data_owner\n", - "
Last Modified: January 10, 2025\n", - "
0 comments, 3 views\n", + "
Last Modified: January 11, 2025\n", + "
0 comments, 48 views\n", " \n", " \n", " " @@ -133,7 +138,7 @@ "" ] }, - "execution_count": 23, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -146,7 +151,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -156,7 +161,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -382,7 +387,7 @@ "[5 rows x 563 columns]" ] }, - "execution_count": 25, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -395,7 +400,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -404,7 +409,7 @@ "(1020, 563)" ] }, - "execution_count": 26, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -422,7 +427,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -439,8 +444,8 @@ " test_har_dataset\n", " \n", "
HAR dataset
CSV by api_data_owner\n", - "
Last Modified: January 10, 2025\n", - "
0 comments, 0 views\n", + "
Last Modified: January 11, 2025\n", + "
0 comments, 55 views\n", " \n", " \n", " " @@ -449,7 +454,7 @@ "" ] }, - "execution_count": 27, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -462,7 +467,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -472,7 +477,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -698,7 +703,7 @@ "[5 rows x 563 columns]" ] }, - "execution_count": 29, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -711,7 +716,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -720,7 +725,7 @@ "(6332, 563)" ] }, - "execution_count": 30, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -738,7 +743,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -749,7 +754,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 13, "metadata": { "scrolled": true }, @@ -1320,7 +1325,7 @@ " 'angle(Z,gravityMean)']" ] }, - "execution_count": 32, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -1331,7 +1336,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -1340,7 +1345,7 @@ "561" ] }, - "execution_count": 33, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -1365,7 +1370,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -1374,18 +1379,18 @@ "(1020, 6)" ] }, - "execution_count": 34, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Data processing to reduce the features to 100 or less as required for TabPFN models\n", - "X = train_har_data.drop(columns=['Activity'])\n", + "X = train_har_data.drop(columns=['subject','Activity'])\n", "y = train_har_data['Activity']\n", "scaler = StandardScaler()\n", "X_scaled = scaler.fit_transform(X)\n", - "lda = LinearDiscriminantAnalysis(n_components=min(100, len(set(y)) - 1))\n", + "lda = LinearDiscriminantAnalysis() \n", "X_reduced_lda = lda.fit_transform(X_scaled, y)\n", "X_train_lda_df = pd.DataFrame(X_reduced_lda, columns=[f'LDA{i+1}' for i in range(X_reduced_lda.shape[1])])\n", "X_train_lda_df['Activity'] = y.reset_index(drop=True)\n", @@ -1394,7 +1399,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -1403,7 +1408,7 @@ "Index(['LDA1', 'LDA2', 'LDA3', 'LDA4', 'LDA5', 'Activity'], dtype='object')" ] }, - "execution_count": 36, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -1422,7 +1427,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -1431,7 +1436,7 @@ "5" ] }, - "execution_count": 35, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -1454,7 +1459,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -1477,7 +1482,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -1513,47 +1518,47 @@ " \n", " 28\n", " STANDING\n", - " -19.431367\n", - " -11.174415\n", - " 0.816325\n", - " -0.687153\n", - " 2.809134\n", + " -19.417553\n", + " -11.243740\n", + " 0.918685\n", + " -0.649895\n", + " 2.771662\n", " \n", " \n", " 130\n", " SITTING\n", - " -18.377005\n", - " -9.429310\n", - " 0.342490\n", - " -0.757008\n", - " -4.074501\n", + " -18.471293\n", + " -9.240233\n", + " 0.375931\n", + " -0.752110\n", + " -4.075432\n", " \n", " \n", " 311\n", " WALKING\n", - " 17.852727\n", - " -1.390446\n", - " -8.164604\n", - " 4.176480\n", - " -0.160302\n", + " 17.773490\n", + " -1.438111\n", + " -8.164614\n", + " 4.168316\n", + " -0.154530\n", " \n", " \n", " 734\n", " WALKING_UPSTAIRS\n", - " 23.633640\n", - " 2.301121\n", - " 2.475455\n", - " -10.447339\n", - " -0.302447\n", + " 23.623186\n", + " 2.154564\n", + " 2.490316\n", + " -10.440684\n", + " -0.306971\n", " \n", " \n", " 847\n", " LAYING\n", - " -26.620913\n", - " 14.783496\n", - " -0.705847\n", - " 0.511383\n", - " -0.568820\n", + " -26.261936\n", + " 14.538433\n", + " -0.750472\n", + " 0.509736\n", + " -0.579957\n", " \n", " \n", "\n", @@ -1561,14 +1566,14 @@ ], "text/plain": [ " Activity LDA1 LDA2 LDA3 LDA4 LDA5\n", - "28 STANDING -19.431367 -11.174415 0.816325 -0.687153 2.809134\n", - "130 SITTING -18.377005 -9.429310 0.342490 -0.757008 -4.074501\n", - "311 WALKING 17.852727 -1.390446 -8.164604 4.176480 -0.160302\n", - "734 WALKING_UPSTAIRS 23.633640 2.301121 2.475455 -10.447339 -0.302447\n", - "847 LAYING -26.620913 14.783496 -0.705847 0.511383 -0.568820" + "28 STANDING -19.417553 -11.243740 0.918685 -0.649895 2.771662\n", + "130 SITTING -18.471293 -9.240233 0.375931 -0.752110 -4.075432\n", + "311 WALKING 17.773490 -1.438111 -8.164614 4.168316 -0.154530\n", + "734 WALKING_UPSTAIRS 23.623186 2.154564 2.490316 -10.440684 -0.306971\n", + "847 LAYING -26.261936 14.538433 -0.750472 0.509736 -0.579957" ] }, - "execution_count": 38, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -1596,7 +1601,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -1614,7 +1619,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -1623,7 +1628,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -1632,7 +1637,7 @@ "0.9901960784313726" ] }, - "execution_count": 41, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -1659,9 +1664,16 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 24, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n" + ] + }, { "data": { "text/html": [ @@ -1696,51 +1708,51 @@ " \n", " 101\n", " LAYING\n", - " -27.925746\n", - " 17.698939\n", - " -0.211993\n", - " 0.137931\n", - " -0.872844\n", + " -27.572224\n", + " 17.536857\n", + " -0.293677\n", + " 0.122248\n", + " -0.868357\n", " LAYING\n", " \n", " \n", " 299\n", " WALKING\n", - " 22.230659\n", - " 0.572533\n", - " -8.318169\n", - " 1.275016\n", - " 0.139479\n", + " 22.234214\n", + " 0.341170\n", + " -8.293393\n", + " 1.279935\n", + " 0.128431\n", " WALKING\n", " \n", " \n", " 693\n", " SITTING\n", - " -18.977440\n", - " -10.123049\n", - " 0.551960\n", - " -0.028800\n", - " -3.010251\n", + " -19.069578\n", + " -9.945686\n", + " 0.593088\n", + " -0.020452\n", + " -3.014706\n", " SITTING\n", " \n", " \n", " 884\n", " WALKING\n", - " 23.685543\n", - " -0.261172\n", - " -11.338481\n", - " 3.464041\n", - " -0.628183\n", + " 23.716562\n", + " -0.591834\n", + " -11.292542\n", + " 3.476408\n", + " -0.650639\n", " WALKING\n", " \n", " \n", " 967\n", " SITTING\n", - " -18.285170\n", - " -11.017798\n", - " 1.873937\n", - " -0.702550\n", - " -1.983438\n", + " -18.414386\n", + " -10.779558\n", + " 1.910474\n", + " -0.696723\n", + " -1.982649\n", " SITTING\n", " \n", " \n", @@ -1749,11 +1761,11 @@ ], "text/plain": [ " Activity LDA1 LDA2 LDA3 LDA4 LDA5 \\\n", - "101 LAYING -27.925746 17.698939 -0.211993 0.137931 -0.872844 \n", - "299 WALKING 22.230659 0.572533 -8.318169 1.275016 0.139479 \n", - "693 SITTING -18.977440 -10.123049 0.551960 -0.028800 -3.010251 \n", - "884 WALKING 23.685543 -0.261172 -11.338481 3.464041 -0.628183 \n", - "967 SITTING -18.285170 -11.017798 1.873937 -0.702550 -1.983438 \n", + "101 LAYING -27.572224 17.536857 -0.293677 0.122248 -0.868357 \n", + "299 WALKING 22.234214 0.341170 -8.293393 1.279935 0.128431 \n", + "693 SITTING -19.069578 -9.945686 0.593088 -0.020452 -3.014706 \n", + "884 WALKING 23.716562 -0.591834 -11.292542 3.476408 -0.650639 \n", + "967 SITTING -18.414386 -10.779558 1.910474 -0.696723 -1.982649 \n", "\n", " Activity_results \n", "101 LAYING \n", @@ -1763,7 +1775,7 @@ "967 SITTING " ] }, - "execution_count": 42, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -1785,7 +1797,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -1798,11 +1810,11 @@ ], "source": [ "# Align tset data with the train data format \n", - "X = test_har_data.drop(columns=['Activity'])\n", + "X = test_har_data.drop(columns=['subject','Activity'])\n", "y = test_har_data['Activity']\n", "scaler = StandardScaler()\n", - "X_scaled = scaler.fit_transform(X)\n", - "lda = LinearDiscriminantAnalysis(n_components=min(100, len(set(y)) - 1)) \n", + "X_scaled = scaler.fit_transform(X) \n", + "lda = LinearDiscriminantAnalysis() \n", "X_reduced_lda = lda.fit_transform(X_scaled, y)\n", "X_test_lda_df = pd.DataFrame(X_reduced_lda, columns=[f'LDA{i+1}' for i in range(X_reduced_lda.shape[1])])\n", "X_test_lda_df['Activity'] = y.reset_index(drop=True)\n", @@ -1811,7 +1823,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -1902,7 +1914,7 @@ "4 -9.596161 -6.980061 0.480017 -0.284537 1.103180 STANDING" ] }, - "execution_count": 44, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -1920,16 +1932,24 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 27, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n" + ] + } + ], "source": [ "activity_predicted_tabpfn = tabpfn_classifier.predict(X_test_lda_df, prediction_type='dataframe')" ] }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -2033,7 +2053,7 @@ "6331 WALKING_UPSTAIRS " ] }, - "execution_count": 46, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -2053,14 +2073,14 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Accuracy: 96.83%\n", + "Accuracy: 96.72%\n", "Precision: 0.97\n", "Recall: 0.97\n", "F1 Score: 0.97\n", @@ -2069,8 +2089,8 @@ " precision recall f1-score support\n", "\n", " LAYING 1.00 1.00 1.00 1219\n", - " SITTING 1.00 0.83 0.91 1119\n", - " STANDING 0.86 0.99 0.93 1197\n", + " SITTING 1.00 0.83 0.90 1119\n", + " STANDING 0.86 0.99 0.92 1197\n", " WALKING 1.00 1.00 1.00 1031\n", "WALKING_DOWNSTAIRS 1.00 0.99 1.00 835\n", " WALKING_UPSTAIRS 0.99 1.00 1.00 931\n", @@ -2114,7 +2134,7 @@ "source": [ "The performance metrics obtained from the trained TabPFN model on the test dataset of 6,332 samples indicate excellent classification quality.\n", "\n", - "`Accuracy (96.81%)` : The model correctly classified approximately 97% of the samples, which is a strong indication of its ability to generalize well to unseen data, despite being trained on a smaller dataset of just 1,020 samples.\n", + "`Accuracy (96.72%)` : The model correctly classified approximately 97% of the samples, which is a strong indication of its ability to generalize well to unseen data, despite being trained on a smaller dataset of just 1,020 samples.\n", "\n", "`Precision (0.97)` : Precision measures the proportion of true positive predictions among all positive predictions made by the model. A precision of 0.97 means that 97% of the predicted positive activity classes are correct, indicating that the model rarely makes false positive errors.\n", "\n", @@ -2158,9 +2178,9 @@ ], "metadata": { "kernelspec": { - "display_name": "pro3.5_LearnLesson2025", + "display_name": "pro3.6_certify2.4.2_24Sep2025 [conda env:conda-pro3.6_certify2.4.2_24Sep2025] *", "language": "python", - "name": "pro3.5_learnlesson2025" + "name": "conda-env-conda-pro3.6_certify2.4.2_24Sep2025-pro3.6_certify2.4.2_24sep2025" }, "language_info": { "codemirror_mode": { @@ -2172,7 +2192,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.11" + "version": "3.13.7" } }, "nbformat": 4,