Whatsapp_chatAnalysis/app.py at master · Divak-ar/Whatsapp_chatAnalysis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import streamlit as st
import preprocessor
import helper
import matplotlib.pyplot as plt
import seaborn as sns

st.set_page_config(layout="wide")
st.sidebar.title("📊 WhatsApp Chat Analyzer")

uploaded_file = st.sidebar.file_uploader("📁 Upload your chat file")
if uploaded_file is not None:
    bytes_data = uploaded_file.getvalue()
    data = bytes_data.decode("utf-8")

    # Use updated preprocessor (supports am/pm)
    df = preprocessor.preprocess(data)

    # Get unique users
    user_list = df['user'].unique().tolist()
    if 'group_notification' in user_list:
        user_list.remove('group_notification')
    user_list.sort()
    user_list.insert(0, "Overall")

    selected_user = st.sidebar.selectbox("👤 Analyze chat for", user_list)

    if st.sidebar.button("🚀 Show Analysis"):

        # === Top Stats ===
        st.title("📈 Top Statistics")
        num_messages, words, num_media_messages, num_links = helper.fetch_stats(selected_user, df)

        col1, col2, col3, col4 = st.columns(4)
        col1.metric("Total Messages", num_messages)
        col2.metric("Total Words", words)
        col3.metric("Media Shared", num_media_messages)
        col4.metric("Links Shared", num_links)

        # === Monthly Timeline ===
        st.title("📅 Monthly Timeline")
        timeline = helper.monthly_timeline(selected_user, df)
        fig, ax = plt.subplots()
        ax.plot(timeline['time'], timeline['message'], color='green')
        plt.xticks(rotation='vertical')
        st.pyplot(fig)

        # === Daily Timeline ===
        st.title("📆 Daily Timeline")
        daily_timeline = helper.daily_timeline(selected_user, df)
        fig, ax = plt.subplots()
        ax.plot(daily_timeline['only_date'], daily_timeline['message'], color='black')
        plt.xticks(rotation='vertical')
        st.pyplot(fig)

        # === Activity Maps ===
        st.title("🗓 Weekly & Monthly Activity")
        col1, col2 = st.columns(2)

        with col1:
            st.subheader("Most Active Day")
            busy_day = helper.week_activity_map(selected_user, df)
            fig, ax = plt.subplots()
            ax.bar(busy_day.index, busy_day.values, color='purple')
            plt.xticks(rotation='vertical')
            st.pyplot(fig)

        with col2:
            st.subheader("Most Active Month")
            busy_month = helper.month_activity_map(selected_user, df)
            fig, ax = plt.subplots()
            ax.bar(busy_month.index, busy_month.values, color='orange')
            plt.xticks(rotation='vertical')
            st.pyplot(fig)

        # === Heatmap ===
        st.title("🔥 Weekly Activity Heatmap")
        user_heatmap = helper.activity_heatmap(selected_user, df)
        fig, ax = plt.subplots()
        sns.heatmap(user_heatmap, ax=ax)
        st.pyplot(fig)

        # === Most Busy Users ===
        if selected_user == 'Overall':
            st.title("🏆 Most Active Users")
            x, new_df = helper.most_busy_users(df)
            col1, col2 = st.columns(2)
            with col1:
                fig, ax = plt.subplots()
                ax.bar(x.index, x.values, color='gray')
                plt.xticks(rotation='vertical')
                st.pyplot(fig)
            with col2:
                renamed_df = new_df.rename(columns={'Percentage': 'Name', 'count': 'Ratio'})
                # st.write("DataFrame column names:", list(new_df.columns))
                st.dataframe(renamed_df)

        # === WordCloud ===
        st.title("☁️ Word Clouds")

        # Create tabs for different word cloud types
        wc_tab1, wc_tab2 = st.tabs(["Extended Word Cloud", "Standard Word Cloud"])

        with wc_tab1:
            st.subheader("Extended Word Cloud")
            df_wc = helper.create_wordcloud(selected_user, df)
            fig, ax = plt.subplots()
            ax.imshow(df_wc)
            ax.axis("off")
            st.pyplot(fig)

        with wc_tab2:
            st.subheader("Standard Word Cloud")
            df_sw_wc = helper.create_modern_wordcloud(selected_user, df)
            fig, ax = plt.subplots(figsize=(10, 6))
            ax.imshow(df_sw_wc)
            ax.axis("off")
            st.pyplot(fig)

        # === Most Common Words ===
        st.title("🔠 Most Common Words")
        most_common_df = helper.most_common_words(selected_user, df)
        fig, ax = plt.subplots()
        ax.barh(most_common_df['word'], most_common_df['count'], color='teal')
        plt.xticks(rotation='horizontal')
        st.pyplot(fig)

        # === Emoji Analysis ===
        st.title("😀 Emoji Analysis")
        emoji_df = helper.emoji_helper(selected_user, df)
        col1, col2 = st.columns(2)
        with col1:
            st.dataframe(emoji_df)
        with col2:
            fig, ax = plt.subplots()
            ax.pie(emoji_df['count'].head(), labels=emoji_df['emoji'].head(), autopct="%0.2f%%")
            st.pyplot(fig)

        # === Top 5 Words Per User ===
        if selected_user == 'Overall':
            st.title("🗣 Top 10 Words Per User")
            top_words = helper.top_10_words_per_user(df)
            for user, words in top_words.items():
                st.markdown(f"**{user}**: {', '.join([w[0] for w in words])}")

        # === Sentiment Analysis ===
        # st.title("🧠 Sentiment Analysis")
        # sentiment_summary, sentiment_data = helper.sentiment_analysis(
        #     df if selected_user == 'Overall' else df[df['user'] == selected_user]
        # )
        # st.subheader("Polarity Summary (range -1 to 1)")
        # st.json(sentiment_summary.to_dict())
        from sentiment_analysis import render_sentiment_analysis_ui
        render_sentiment_analysis_ui(df, selected_user)