diff --git a/SEBICops/Clusters.ipynb b/SEBICops/Clusters.ipynb new file mode 100644 index 0000000..1712c1e --- /dev/null +++ b/SEBICops/Clusters.ipynb @@ -0,0 +1,2225 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 38, + "id": "23b33f4a-706a-495d-a50c-cb91102e09e2", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.cluster import KMeans, DBSCAN\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import silhouette_score" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "23c03090-ccbc-4a74-99d4-8aa14fb97f7b", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "bf88dab2-1ac9-4b1b-86ec-5c3883e617db", + "metadata": {}, + "outputs": [], + "source": [ + "embedding_data = pd.read_pickle('./df_video_info.pkl')\n", + "feature_data = pd.read_feather('./sponsorTimes_filtered_videoID_level.feather')" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "34f5c609-23db-4d2b-9b24-987d8aa00102", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | videoID | \n", + "fe_segment_duration_0 | \n", + "fe_segment_duration_1 | \n", + "fe_segment_duration_2 | \n", + "views_0 | \n", + "views_1 | \n", + "views_2 | \n", + "votes_0 | \n", + "votes_1 | \n", + "votes_2 | \n", + "videoDuration | \n", + "fe_ratio_segment_duration_with_total_video_duration_0 | \n", + "fe_ratio_segment_duration_with_total_video_duration_1 | \n", + "fe_ratio_segment_duration_with_total_video_duration_2 | \n", + "fe_more_than_35_perc_sponsorship_content | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "--kZomtrtIQ | \n", + "5.756 | \n", + "0.000 | \n", + "0.000 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "465.046 | \n", + "0.012377 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0 | \n", + "
1 | \n", + "-2MyBawvlts | \n", + "5.987 | \n", + "0.000 | \n", + "0.000 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "636.064 | \n", + "0.009413 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0 | \n", + "
2 | \n", + "-3AfFa0rV6Q | \n", + "24.202 | \n", + "22.070 | \n", + "41.269 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "2477.000 | \n", + "0.009771 | \n", + "0.008910 | \n", + "0.016661 | \n", + "0 | \n", + "
3 | \n", + "-3Q-k4WQTDI | \n", + "7.384 | \n", + "0.000 | \n", + "0.000 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "242.024 | \n", + "0.030509 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0 | \n", + "
4 | \n", + "-6WBAaHqT8g | \n", + "5.334 | \n", + "0.000 | \n", + "0.000 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "242.024 | \n", + "0.022039 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0 | \n", + "
5 | \n", + "-6Z1ISvWq1U | \n", + "12.278 | \n", + "6.811 | \n", + "0.000 | \n", + "6 | \n", + "5 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "779.078 | \n", + "0.015760 | \n", + "0.008742 | \n", + "0.000000 | \n", + "0 | \n", + "
6 | \n", + "-8EtKbQg4Fk | \n", + "19.938 | \n", + "0.000 | \n", + "0.000 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "384.038 | \n", + "0.051917 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0 | \n", + "
7 | \n", + "-8fTqHiG_hc | \n", + "2.320 | \n", + "0.000 | \n", + "0.000 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "32.031 | \n", + "0.072430 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0 | \n", + "
8 | \n", + "-8hK4Y5h_HQ | \n", + "20.202 | \n", + "0.000 | \n", + "0.000 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "321.032 | \n", + "0.062928 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0 | \n", + "
9 | \n", + "-B-f2-b6Nr0 | \n", + "11.167 | \n", + "0.000 | \n", + "0.000 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "473.047 | \n", + "0.023607 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0 | \n", + "
\n", + " | videoID | \n", + "channelID | \n", + "title | \n", + "published | \n", + "name | \n", + "embedding | \n", + "
---|---|---|---|---|---|---|
0 | \n", + "2USGSuPe8SQ | \n", + "UCsNxHPbaCWL1tKw2hxGQD6g | \n", + "Stock Market के Basics, Risks और Returns - Sha... | \n", + "1.568938e+09 | \n", + "AssetYogi | \n", + "[0.026454309, -0.009652234, -0.0023297989, 0.0... | \n", + "
1 | \n", + "3PGL5pkqwVM | \n", + "UCsNxHPbaCWL1tKw2hxGQD6g | \n", + "Mutual Funds Investment Reality for Beginners ... | \n", + "1.578614e+09 | \n", + "AssetYogi | \n", + "[0.082667224, -0.10355977, -0.069015354, 0.027... | \n", + "
2 | \n", + "um42od-JW-M | \n", + "UCqW8jxh4tH1Z1sWPbkGWL4g | \n", + "Masterclass on communicating effectively | How... | \n", + "1.634342e+09 | \n", + "AkshatZayn | \n", + "[0.08096711, -0.042743586, 0.10679469, -0.0306... | \n", + "
3 | \n", + "1SyX64uQTgM | \n", + "UCqW8jxh4tH1Z1sWPbkGWL4g | \n", + "One stock that I REGRET not buying. | \n", + "1.634515e+09 | \n", + "AkshatZayn | \n", + "[-0.0051836153, -0.015406725, -0.011564667, 0.... | \n", + "
4 | \n", + "OLj9sgfQPhA | \n", + "UCUMccND2H_CVS0dMZKCPCXA | \n", + "The Economics Of Drugs 😵 | The Untold Truth Of... | \n", + "1.634429e+09 | \n", + "namaskarprasad | \n", + "[0.020754067, 0.0007845427, -0.03164388, -0.03... | \n", + "
5 | \n", + "4dZPzbdScy8 | \n", + "UCRzYN32xtBf3Yxsx5BvJWJw | \n", + "I INVESTED IN A FIXED DEPOSIT! | \n", + "1.634602e+09 | \n", + "warikoo | \n", + "[0.05586297, 0.027604798, 0.017507043, 0.07515... | \n", + "
6 | \n", + "CZxxlUf55Fo | \n", + "UCwAdQUuPT6laN-AQR17fe1g | \n", + "Financial Plan for your 1st income | \n", + "1.634170e+09 | \n", + "pranjalkamra | \n", + "[-0.013564097, 0.025042024, -0.016356125, 0.03... | \n", + "
7 | \n", + "GUlg076O89c | \n", + "UCe3qdG0A_gr-sEdat5y2twQ | \n", + "Crores of Rupees Lost? | Explained by CA Racha... | \n", + "1.634602e+09 | \n", + "CARachanaRanade | \n", + "[-0.0028898187, 0.027031748, -0.04745055, 0.07... | \n", + "
8 | \n", + "R8ZWwkbS6Ww | \n", + "UCe3qdG0A_gr-sEdat5y2twQ | \n", + "Should I own a Credit Card? Explained by CA Ra... | \n", + "1.600733e+09 | \n", + "CARachanaRanade | \n", + "[-0.0059682536, 0.024231985, -0.034983452, 0.0... | \n", + "
9 | \n", + "dqXy59i5Zd4 | \n", + "UCe3qdG0A_gr-sEdat5y2twQ | \n", + "How do I earn more interest on my money? | CA ... | \n", + "1.634688e+09 | \n", + "CARachanaRanade | \n", + "[0.044167295, 0.003461873, -0.05475514, 0.0171... | \n", + "
\n", + " | videoID | \n", + "fe_segment_duration_0 | \n", + "fe_segment_duration_1 | \n", + "fe_segment_duration_2 | \n", + "views_0 | \n", + "views_1 | \n", + "views_2 | \n", + "votes_0 | \n", + "votes_1 | \n", + "votes_2 | \n", + "videoDuration | \n", + "fe_ratio_segment_duration_with_total_video_duration_0 | \n", + "fe_ratio_segment_duration_with_total_video_duration_1 | \n", + "fe_ratio_segment_duration_with_total_video_duration_2 | \n", + "fe_more_than_35_perc_sponsorship_content | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "--kZomtrtIQ | \n", + "5.756 | \n", + "0.000 | \n", + "0.000 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "465.046 | \n", + "0.012377 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0 | \n", + "
1 | \n", + "-2MyBawvlts | \n", + "5.987 | \n", + "0.000 | \n", + "0.000 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "636.064 | \n", + "0.009413 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0 | \n", + "
2 | \n", + "-3AfFa0rV6Q | \n", + "24.202 | \n", + "22.070 | \n", + "41.269 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "2477.000 | \n", + "0.009771 | \n", + "0.008910 | \n", + "0.016661 | \n", + "0 | \n", + "
3 | \n", + "-3Q-k4WQTDI | \n", + "7.384 | \n", + "0.000 | \n", + "0.000 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "242.024 | \n", + "0.030509 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0 | \n", + "
4 | \n", + "-6WBAaHqT8g | \n", + "5.334 | \n", + "0.000 | \n", + "0.000 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "242.024 | \n", + "0.022039 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
2358 | \n", + "zwLHJlNMlf4 | \n", + "0.000 | \n", + "24.599 | \n", + "0.000 | \n", + "0 | \n", + "76 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1092.000 | \n", + "0.000000 | \n", + "0.022527 | \n", + "0.000000 | \n", + "0 | \n", + "
2359 | \n", + "zxKURXHy6es | \n", + "46.073 | \n", + "0.000 | \n", + "0.000 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "733.073 | \n", + "0.062849 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0 | \n", + "
2360 | \n", + "zxdOcHOrAdE | \n", + "0.000 | \n", + "20.980 | \n", + "0.000 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "61.060 | \n", + "0.000000 | \n", + "0.343596 | \n", + "0.000000 | \n", + "0 | \n", + "
2361 | \n", + "zxi7Rm-lWTg | \n", + "43.520 | \n", + "0.000 | \n", + "43.337 | \n", + "36 | \n", + "0 | \n", + "11 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "555.000 | \n", + "0.078414 | \n", + "0.000000 | \n", + "0.078085 | \n", + "0 | \n", + "
2362 | \n", + "zzJ0iHJ7_nk | \n", + "24.830 | \n", + "117.100 | \n", + "0.000 | \n", + "87 | \n", + "315 | \n", + "0 | \n", + "0 | \n", + "-1 | \n", + "0 | \n", + "605.201 | \n", + "0.041028 | \n", + "0.193489 | \n", + "0.000000 | \n", + "0 | \n", + "
2363 rows × 15 columns
\n", + "\n", + " | 143 | \n", + "
---|---|
videoID | \n", + "2USGSuPe8SQ | \n", + "
fe_segment_duration_0 | \n", + "68.680586 | \n", + "
fe_segment_duration_1 | \n", + "0.0 | \n", + "
fe_segment_duration_2 | \n", + "0.0 | \n", + "
views_0 | \n", + "151 | \n", + "
views_1 | \n", + "0 | \n", + "
views_2 | \n", + "0 | \n", + "
votes_0 | \n", + "0 | \n", + "
votes_1 | \n", + "0 | \n", + "
votes_2 | \n", + "0 | \n", + "
videoDuration | \n", + "1800.081 | \n", + "
fe_ratio_segment_duration_with_total_video_duration_0 | \n", + "0.038154 | \n", + "
fe_ratio_segment_duration_with_total_video_duration_1 | \n", + "0.0 | \n", + "
fe_ratio_segment_duration_with_total_video_duration_2 | \n", + "0.0 | \n", + "
fe_more_than_35_perc_sponsorship_content | \n", + "0 | \n", + "
\n", + " | fe_segment_duration_0 | \n", + "fe_segment_duration_1 | \n", + "fe_segment_duration_2 | \n", + "views_0 | \n", + "views_1 | \n", + "views_2 | \n", + "votes_0 | \n", + "votes_1 | \n", + "votes_2 | \n", + "videoDuration | \n", + "... | \n", + "374 | \n", + "375 | \n", + "376 | \n", + "377 | \n", + "378 | \n", + "379 | \n", + "380 | \n", + "381 | \n", + "382 | \n", + "383 | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "... | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "
mean | \n", + "20.137511 | \n", + "27.236475 | \n", + "8.203986 | \n", + "14.888701 | \n", + "53.520948 | \n", + "5.762167 | \n", + "0.019467 | \n", + "-0.002539 | \n", + "0.003386 | \n", + "951.087211 | \n", + "... | \n", + "0.008716 | \n", + "0.000507 | \n", + "-0.026151 | \n", + "0.025948 | \n", + "-0.035363 | \n", + "-0.006474 | \n", + "0.054771 | \n", + "-0.058691 | \n", + "-0.036102 | \n", + "0.027509 | \n", + "
std | \n", + "42.110801 | \n", + "69.460023 | \n", + "27.732029 | \n", + "75.225058 | \n", + "318.610721 | \n", + "44.939315 | \n", + "0.210946 | \n", + "0.442256 | \n", + "0.126794 | \n", + "797.083870 | \n", + "... | \n", + "0.040685 | \n", + "0.048089 | \n", + "0.046112 | \n", + "0.040346 | \n", + "0.049848 | \n", + "0.042834 | \n", + "0.049673 | \n", + "0.055822 | \n", + "0.050280 | \n", + "0.042570 | \n", + "
min | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "-2.000000 | \n", + "-15.000000 | \n", + "-2.000000 | \n", + "12.011000 | \n", + "... | \n", + "-0.118398 | \n", + "-0.160237 | \n", + "-0.160714 | \n", + "-0.109925 | \n", + "-0.185898 | \n", + "-0.139312 | \n", + "-0.107283 | \n", + "-0.209248 | \n", + "-0.175477 | \n", + "-0.110835 | \n", + "
25% | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "447.362500 | \n", + "... | \n", + "-0.019848 | \n", + "-0.031422 | \n", + "-0.058615 | \n", + "-0.000969 | \n", + "-0.067551 | \n", + "-0.036673 | \n", + "0.021013 | \n", + "-0.100077 | \n", + "-0.072208 | \n", + "-0.001920 | \n", + "
50% | \n", + "10.074000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "832.000000 | \n", + "... | \n", + "0.008296 | \n", + "0.000408 | \n", + "-0.027543 | \n", + "0.025049 | \n", + "-0.035819 | \n", + "-0.008552 | \n", + "0.055948 | \n", + "-0.059038 | \n", + "-0.036381 | \n", + "0.028096 | \n", + "
75% | \n", + "24.063600 | \n", + "26.523500 | \n", + "0.000000 | \n", + "3.000000 | \n", + "4.500000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "1185.750500 | \n", + "... | \n", + "0.035374 | \n", + "0.034574 | \n", + "0.004883 | \n", + "0.052254 | \n", + "-0.001974 | \n", + "0.022251 | \n", + "0.088571 | \n", + "-0.019058 | \n", + "-0.002650 | \n", + "0.056992 | \n", + "
max | \n", + "1114.389000 | \n", + "1101.220000 | \n", + "499.816000 | \n", + "2358.000000 | \n", + "13089.000000 | \n", + "1431.000000 | \n", + "4.000000 | \n", + "3.000000 | \n", + "2.000000 | \n", + "7811.000000 | \n", + "... | \n", + "0.148813 | \n", + "0.145608 | \n", + "0.124663 | \n", + "0.171951 | \n", + "0.135764 | \n", + "0.130991 | \n", + "0.215836 | \n", + "0.159757 | \n", + "0.138907 | \n", + "0.170253 | \n", + "
8 rows × 398 columns
\n", + "\n", + " | fe_segment_duration_0 | \n", + "fe_segment_duration_1 | \n", + "fe_segment_duration_2 | \n", + "views_0 | \n", + "views_1 | \n", + "views_2 | \n", + "votes_0 | \n", + "votes_1 | \n", + "votes_2 | \n", + "videoDuration | \n", + "... | \n", + "374 | \n", + "375 | \n", + "376 | \n", + "377 | \n", + "378 | \n", + "379 | \n", + "380 | \n", + "381 | \n", + "382 | \n", + "383 | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "... | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "2363.000000 | \n", + "
mean | \n", + "0.018070 | \n", + "0.024733 | \n", + "0.016414 | \n", + "0.006314 | \n", + "0.004089 | \n", + "0.004027 | \n", + "0.019467 | \n", + "-0.002539 | \n", + "0.003386 | \n", + "0.120410 | \n", + "... | \n", + "0.008716 | \n", + "0.000507 | \n", + "-0.026151 | \n", + "0.025948 | \n", + "-0.035363 | \n", + "-0.006474 | \n", + "0.054771 | \n", + "-0.058691 | \n", + "-0.036102 | \n", + "0.027509 | \n", + "
std | \n", + "0.037788 | \n", + "0.063076 | \n", + "0.055484 | \n", + "0.031902 | \n", + "0.024342 | \n", + "0.031404 | \n", + "0.210946 | \n", + "0.442256 | \n", + "0.126794 | \n", + "0.102203 | \n", + "... | \n", + "0.040685 | \n", + "0.048089 | \n", + "0.046112 | \n", + "0.040346 | \n", + "0.049848 | \n", + "0.042834 | \n", + "0.049673 | \n", + "0.055822 | \n", + "0.050280 | \n", + "0.042570 | \n", + "
min | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "-2.000000 | \n", + "-15.000000 | \n", + "-2.000000 | \n", + "0.000000 | \n", + "... | \n", + "-0.118398 | \n", + "-0.160237 | \n", + "-0.160714 | \n", + "-0.109925 | \n", + "-0.185898 | \n", + "-0.139312 | \n", + "-0.107283 | \n", + "-0.209248 | \n", + "-0.175477 | \n", + "-0.110835 | \n", + "
25% | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.055822 | \n", + "... | \n", + "-0.019848 | \n", + "-0.031422 | \n", + "-0.058615 | \n", + "-0.000969 | \n", + "-0.067551 | \n", + "-0.036673 | \n", + "0.021013 | \n", + "-0.100077 | \n", + "-0.072208 | \n", + "-0.001920 | \n", + "
50% | \n", + "0.009040 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.105140 | \n", + "... | \n", + "0.008296 | \n", + "0.000408 | \n", + "-0.027543 | \n", + "0.025049 | \n", + "-0.035819 | \n", + "-0.008552 | \n", + "0.055948 | \n", + "-0.059038 | \n", + "-0.036381 | \n", + "0.028096 | \n", + "
75% | \n", + "0.021594 | \n", + "0.024086 | \n", + "0.000000 | \n", + "0.001272 | \n", + "0.000344 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.150499 | \n", + "... | \n", + "0.035374 | \n", + "0.034574 | \n", + "0.004883 | \n", + "0.052254 | \n", + "-0.001974 | \n", + "0.022251 | \n", + "0.088571 | \n", + "-0.019058 | \n", + "-0.002650 | \n", + "0.056992 | \n", + "
max | \n", + "1.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "4.000000 | \n", + "3.000000 | \n", + "2.000000 | \n", + "1.000000 | \n", + "... | \n", + "0.148813 | \n", + "0.145608 | \n", + "0.124663 | \n", + "0.171951 | \n", + "0.135764 | \n", + "0.130991 | \n", + "0.215836 | \n", + "0.159757 | \n", + "0.138907 | \n", + "0.170253 | \n", + "
8 rows × 398 columns
\n", + "KMeans(n_clusters=3)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
KMeans(n_clusters=3)
\n", + " | title | \n", + "videoID | \n", + "name | \n", + "label | \n", + "
---|---|---|---|---|
0 | \n", + "Managing Your Money Like the Rich! | Ankur War... | \n", + "soMHbU3Uqjk | \n", + "warikoo | \n", + "0.0 | \n", + "
1 | \n", + "Want to RETIRE before 30? WATCH THIS! | \n", + "1gWwlIgua2g | \n", + "warikoo | \n", + "0.0 | \n", + "
2 | \n", + "Investing in 2 GREAT stocks! | \n", + "kriJ0QSOf1k | \n", + "AkshatZayn | \n", + "0.0 | \n", + "
3 | \n", + "Earn ₹50,000+ Per Month without Investment | H... | \n", + "KQv1_cuIQIk | \n", + "PushkarRajThakurOfficial | \n", + "0.0 | \n", + "
4 | \n", + "How to Make Money Online with #Upstox? | Earn ... | \n", + "1vyL5fDHJG4 | \n", + "PushkarRajThakurOfficial | \n", + "0.0 | \n", + "
DBSCAN(eps=7, min_samples=1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
DBSCAN(eps=7, min_samples=1)
\n", + " | videoID | \n", + "channelID | \n", + "title | \n", + "published | \n", + "name | \n", + "
---|---|---|---|---|---|
0 | \n", + "2USGSuPe8SQ | \n", + "UCsNxHPbaCWL1tKw2hxGQD6g | \n", + "Stock Market के Basics, Risks और Returns - Sha... | \n", + "1.568938e+09 | \n", + "AssetYogi | \n", + "
1 | \n", + "3PGL5pkqwVM | \n", + "UCsNxHPbaCWL1tKw2hxGQD6g | \n", + "Mutual Funds Investment Reality for Beginners ... | \n", + "1.578614e+09 | \n", + "AssetYogi | \n", + "
2 | \n", + "um42od-JW-M | \n", + "UCqW8jxh4tH1Z1sWPbkGWL4g | \n", + "Masterclass on communicating effectively | How... | \n", + "1.634342e+09 | \n", + "AkshatZayn | \n", + "
3 | \n", + "1SyX64uQTgM | \n", + "UCqW8jxh4tH1Z1sWPbkGWL4g | \n", + "One stock that I REGRET not buying. | \n", + "1.634515e+09 | \n", + "AkshatZayn | \n", + "
4 | \n", + "OLj9sgfQPhA | \n", + "UCUMccND2H_CVS0dMZKCPCXA | \n", + "The Economics Of Drugs 😵 | The Untold Truth Of... | \n", + "1.634429e+09 | \n", + "namaskarprasad | \n", + "
\n", + " | videoID | \n", + "title | \n", + "original | \n", + "userID | \n", + "service | \n", + "hashedVideoID | \n", + "timeSubmitted | \n", + "UUID | \n", + "
---|---|---|---|---|---|---|---|---|
0 | \n", + "U0wTDK0VOeY | \n", + "History of Antarctica's Flag | \n", + "0 | \n", + "1ff2cdd11ed952d0c13d678413113860ff279a3d90d31c... | \n", + "YouTube | \n", + "93646c719490256e8cb43cfaa41e39534525389b0b28f5... | \n", + "1680044079895 | \n", + "07f32a99-3e2d-48a3-a247-b2b03f7cd4bc | \n", + "
\n", + " | videoID | \n", + "channelID | \n", + "title | \n", + "published | \n", + "
---|---|---|---|---|
0 | \n", + "2USGSuPe8SQ | \n", + "UCsNxHPbaCWL1tKw2hxGQD6g | \n", + "Stock Market के Basics, Risks और Returns - Sha... | \n", + "1.568938e+09 | \n", + "
\n", + " | videoID | \n", + "startTime | \n", + "endTime | \n", + "votes | \n", + "locked | \n", + "timeSubmitted | \n", + "views | \n", + "category | \n", + "actionType | \n", + "videoDuration | \n", + "hidden | \n", + "reputation | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "--kZomtrtIQ | \n", + "459.290 | \n", + "465.046 | \n", + "0 | \n", + "0 | \n", + "1666424633336 | \n", + "0 | \n", + "outro | \n", + "skip | \n", + "465.046 | \n", + "0 | \n", + "0.000000 | \n", + "
1 | \n", + "-2MyBawvlts | \n", + "26.726 | \n", + "32.713 | \n", + "0 | \n", + "0 | \n", + "1670724050394 | \n", + "1 | \n", + "filler | \n", + "skip | \n", + "636.064 | \n", + "0 | \n", + "0.000000 | \n", + "
2 | \n", + "-3AfFa0rV6Q | \n", + "1405.890 | \n", + "1447.159 | \n", + "0 | \n", + "0 | \n", + "1659371242341 | \n", + "0 | \n", + "selfpromo | \n", + "skip | \n", + "2477.000 | \n", + "0 | \n", + "1.110345 | \n", + "
3 | \n", + "-3AfFa0rV6Q | \n", + "2088.830 | \n", + "2110.900 | \n", + "0 | \n", + "0 | \n", + "1659371242341 | \n", + "0 | \n", + "sponsor | \n", + "skip | \n", + "2477.000 | \n", + "0 | \n", + "1.110345 | \n", + "
4 | \n", + "-3AfFa0rV6Q | \n", + "2366.269 | \n", + "2390.471 | \n", + "0 | \n", + "0 | \n", + "1659371242341 | \n", + "0 | \n", + "interaction | \n", + "skip | \n", + "2477.000 | \n", + "0 | \n", + "1.110345 | \n", + "
\n", + " | videoID | \n", + "startTime | \n", + "endTime | \n", + "votes | \n", + "locked | \n", + "timeSubmitted | \n", + "views | \n", + "category | \n", + "actionType | \n", + "videoDuration | \n", + "hidden | \n", + "reputation | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|
2 | \n", + "-3AfFa0rV6Q | \n", + "1405.890 | \n", + "1447.159 | \n", + "0 | \n", + "0 | \n", + "1659371242341 | \n", + "0 | \n", + "selfpromo | \n", + "skip | \n", + "2477.0 | \n", + "0 | \n", + "1.110345 | \n", + "
3 | \n", + "-3AfFa0rV6Q | \n", + "2088.830 | \n", + "2110.900 | \n", + "0 | \n", + "0 | \n", + "1659371242341 | \n", + "0 | \n", + "sponsor | \n", + "skip | \n", + "2477.0 | \n", + "0 | \n", + "1.110345 | \n", + "
4 | \n", + "-3AfFa0rV6Q | \n", + "2366.269 | \n", + "2390.471 | \n", + "0 | \n", + "0 | \n", + "1659371242341 | \n", + "0 | \n", + "interaction | \n", + "skip | \n", + "2477.0 | \n", + "0 | \n", + "1.110345 | \n", + "
\n", + " | videoID | \n", + "fe_segment_duration_0 | \n", + "fe_segment_duration_1 | \n", + "fe_segment_duration_2 | \n", + "views_0 | \n", + "views_1 | \n", + "views_2 | \n", + "votes_0 | \n", + "votes_1 | \n", + "votes_2 | \n", + "videoDuration | \n", + "fe_ratio_segment_duration_with_total_video_duration_0 | \n", + "fe_ratio_segment_duration_with_total_video_duration_1 | \n", + "fe_ratio_segment_duration_with_total_video_duration_2 | \n", + "fe_more_than_35_perc_sponsorship_content | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "--kZomtrtIQ | \n", + "5.756 | \n", + "0.000 | \n", + "0.000 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "465.046 | \n", + "0.012377 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0 | \n", + "
1 | \n", + "-2MyBawvlts | \n", + "5.987 | \n", + "0.000 | \n", + "0.000 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "636.064 | \n", + "0.009413 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0 | \n", + "
2 | \n", + "-3AfFa0rV6Q | \n", + "24.202 | \n", + "22.070 | \n", + "41.269 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "2477.000 | \n", + "0.009771 | \n", + "0.008910 | \n", + "0.016661 | \n", + "0 | \n", + "
3 | \n", + "-3Q-k4WQTDI | \n", + "7.384 | \n", + "0.000 | \n", + "0.000 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "242.024 | \n", + "0.030509 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0 | \n", + "
4 | \n", + "-6WBAaHqT8g | \n", + "5.334 | \n", + "0.000 | \n", + "0.000 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "242.024 | \n", + "0.022039 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
2358 | \n", + "zwLHJlNMlf4 | \n", + "0.000 | \n", + "24.599 | \n", + "0.000 | \n", + "0 | \n", + "76 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1092.000 | \n", + "0.000000 | \n", + "0.022527 | \n", + "0.000000 | \n", + "0 | \n", + "
2359 | \n", + "zxKURXHy6es | \n", + "46.073 | \n", + "0.000 | \n", + "0.000 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "733.073 | \n", + "0.062849 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0 | \n", + "
2360 | \n", + "zxdOcHOrAdE | \n", + "0.000 | \n", + "20.980 | \n", + "0.000 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "61.060 | \n", + "0.000000 | \n", + "0.343596 | \n", + "0.000000 | \n", + "0 | \n", + "
2361 | \n", + "zxi7Rm-lWTg | \n", + "43.520 | \n", + "0.000 | \n", + "43.337 | \n", + "36 | \n", + "0 | \n", + "11 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "555.000 | \n", + "0.078414 | \n", + "0.000000 | \n", + "0.078085 | \n", + "0 | \n", + "
2362 | \n", + "zzJ0iHJ7_nk | \n", + "24.830 | \n", + "117.100 | \n", + "0.000 | \n", + "87 | \n", + "315 | \n", + "0 | \n", + "0 | \n", + "-1 | \n", + "0 | \n", + "605.201 | \n", + "0.041028 | \n", + "0.193489 | \n", + "0.000000 | \n", + "0 | \n", + "
2363 rows × 15 columns
\n", + "