Skip to content

Commit ad60943

Browse files
committed
Refactor: eliminate code duplication
Create shared analytics utilities - Create analytics_utils.py module with 11 reusable data processing functions - Refactor app.py to use shared utilities, removing ~200 lines of duplicated code - Update send_weekly_digest.py to leverage shared data processing functions - Consolidate form validation logic into helper functions - Improve error handling in pivot table creation and statistics calculation - Enhance code maintainability by centralizing analytics logic This refactoring eliminates significant duplication between general statistics and manager-filtered views while maintaining all existing functionality.
1 parent ff10928 commit ad60943

File tree

3 files changed

+486
-251
lines changed

3 files changed

+486
-251
lines changed

analytics_utils.py

Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
"""
2+
Shared analytics utilities for AI Usage Log application.
3+
Contains common data processing and statistics calculation functions.
4+
"""
5+
import pandas as pd
6+
from datetime import datetime, timedelta
7+
8+
9+
def prepare_dataframe(entries, workflow_impact_map=None, task_complexity_map=None):
10+
"""
11+
Prepare and clean the dataframe for analysis.
12+
13+
Args:
14+
entries: List of entry dictionaries
15+
workflow_impact_map: Optional mapping for workflow impact reverse lookup
16+
task_complexity_map: Optional mapping for task complexity reverse lookup
17+
18+
Returns:
19+
Cleaned pandas DataFrame
20+
"""
21+
df = pd.DataFrame(entries)
22+
if df.empty:
23+
return df
24+
25+
# Apply reverse mappings if provided
26+
if workflow_impact_map and 'Workflow Impact' in df.columns:
27+
df['Workflow Impact'] = df['Workflow Impact'].map(workflow_impact_map).fillna(df['Workflow Impact'])
28+
if task_complexity_map and 'Task Complexity' in df.columns:
29+
df['Task Complexity'] = df['Task Complexity'].map(task_complexity_map).fillna(df['Task Complexity'])
30+
31+
# Calculate time saved and ensure timestamp is datetime
32+
df["Time Saved"] = df["Time Without AI"] - df["Duration"]
33+
df["Timestamp"] = pd.to_datetime(df["Timestamp"], errors="coerce")
34+
35+
return df
36+
37+
38+
def filter_last_n_days(df, days=7):
39+
"""
40+
Filter dataframe to include only entries from the last N days.
41+
42+
Args:
43+
df: pandas DataFrame with Timestamp column
44+
days: Number of days to include (default: 7)
45+
46+
Returns:
47+
Filtered pandas DataFrame
48+
"""
49+
if df.empty or 'Timestamp' not in df.columns:
50+
return df
51+
52+
cutoff_date = datetime.now() - timedelta(days=days)
53+
return df[df["Timestamp"] >= cutoff_date]
54+
55+
56+
def calculate_basic_stats(df):
57+
"""
58+
Calculate basic statistics from the dataframe.
59+
60+
Args:
61+
df: pandas DataFrame with usage data
62+
63+
Returns:
64+
Dictionary containing basic statistics
65+
"""
66+
if df.empty:
67+
return {}
68+
69+
stats = {
70+
'total_entries': len(df),
71+
'avg_time_saved': df["Time Saved"].mean() if "Time Saved" in df.columns else 0,
72+
'avg_duration': df["Duration"].mean() if "Duration" in df.columns else 0,
73+
'avg_satisfaction': df["Satisfaction"].mean() if "Satisfaction" in df.columns else 0,
74+
}
75+
76+
# Tool-specific stats
77+
if "AI Tool" in df.columns:
78+
stats['avg_duration_per_tool'] = df.groupby("AI Tool")["Duration"].mean().to_dict()
79+
stats['total_duration_per_tool'] = df.groupby("AI Tool")["Duration"].sum().to_dict()
80+
81+
# Purpose distribution
82+
if "Purpose" in df.columns:
83+
stats['purpose_distribution'] = df['Purpose'].value_counts().to_dict()
84+
85+
return stats
86+
87+
88+
def create_pivot_table(df, index, columns, values, aggfunc="mean"):
89+
"""
90+
Create a pivot table with error handling.
91+
92+
Args:
93+
df: pandas DataFrame
94+
index: Column for pivot table index
95+
columns: Column for pivot table columns
96+
values: Column for pivot table values
97+
aggfunc: Aggregation function (default: "mean")
98+
99+
Returns:
100+
Pivot table DataFrame or None if creation fails
101+
"""
102+
try:
103+
if all(col in df.columns for col in [index, columns, values]):
104+
return df.pivot_table(
105+
index=index,
106+
columns=columns,
107+
values=values,
108+
aggfunc=aggfunc
109+
)
110+
except Exception:
111+
pass
112+
return None
113+
114+
115+
def calculate_tool_effectiveness(df):
116+
"""
117+
Calculate tool effectiveness metrics.
118+
119+
Args:
120+
df: pandas DataFrame with usage data
121+
122+
Returns:
123+
DataFrame with tool effectiveness metrics
124+
"""
125+
if df.empty or "AI Tool" not in df.columns:
126+
return pd.DataFrame()
127+
128+
agg_dict = {}
129+
if "Time Saved" in df.columns:
130+
agg_dict["Time Saved"] = "mean"
131+
if "Satisfaction" in df.columns:
132+
agg_dict["Satisfaction"] = "mean"
133+
if "Workflow Impact" in df.columns:
134+
agg_dict["Workflow Impact"] = lambda x: x.value_counts().index[0] if not x.empty else None
135+
136+
if not agg_dict:
137+
return pd.DataFrame()
138+
139+
tool_stats = df.groupby("AI Tool").agg(agg_dict).reset_index()
140+
141+
# Rename columns for clarity
142+
rename_dict = {
143+
"Time Saved": "Avg Time Saved",
144+
"Satisfaction": "Avg Satisfaction",
145+
"Workflow Impact": "Most Common Workflow Impact"
146+
}
147+
tool_stats.rename(columns=rename_dict, inplace=True)
148+
149+
return tool_stats
150+
151+
152+
def calculate_complexity_analysis(df):
153+
"""
154+
Calculate task complexity analysis.
155+
156+
Args:
157+
df: pandas DataFrame with usage data
158+
159+
Returns:
160+
DataFrame with complexity analysis
161+
"""
162+
if df.empty or "Task Complexity" not in df.columns:
163+
return pd.DataFrame()
164+
165+
agg_dict = {}
166+
if "Time Saved" in df.columns:
167+
agg_dict["Time Saved"] = "mean"
168+
if "Satisfaction" in df.columns:
169+
agg_dict["Satisfaction"] = "mean"
170+
171+
if not agg_dict:
172+
return pd.DataFrame()
173+
174+
complexity_stats = df.groupby("Task Complexity").agg(agg_dict).reset_index()
175+
176+
# Rename columns for clarity
177+
rename_dict = {
178+
"Time Saved": "Avg Time Saved",
179+
"Satisfaction": "Avg Satisfaction"
180+
}
181+
complexity_stats.rename(columns=rename_dict, inplace=True)
182+
183+
return complexity_stats
184+
185+
186+
def calculate_manager_insights(df):
187+
"""
188+
Calculate manager/team insights.
189+
190+
Args:
191+
df: pandas DataFrame with usage data
192+
193+
Returns:
194+
DataFrame with manager insights
195+
"""
196+
if df.empty or "Manager" not in df.columns:
197+
return pd.DataFrame()
198+
199+
agg_dict = {"Duration": "count"} # Count of tasks
200+
if "Time Saved" in df.columns:
201+
agg_dict["Time Saved"] = "mean"
202+
if "Satisfaction" in df.columns:
203+
agg_dict["Satisfaction"] = "mean"
204+
205+
manager_stats = df.groupby("Manager").agg(agg_dict).reset_index()
206+
207+
# Rename columns for clarity
208+
rename_dict = {
209+
"Time Saved": "Avg Time Saved",
210+
"Satisfaction": "Avg Satisfaction",
211+
"Duration": "# Tasks"
212+
}
213+
manager_stats.rename(columns=rename_dict, inplace=True)
214+
215+
return manager_stats
216+
217+
218+
def calculate_purpose_insights(df):
219+
"""
220+
Calculate purpose-based insights.
221+
222+
Args:
223+
df: pandas DataFrame with usage data
224+
225+
Returns:
226+
DataFrame with purpose insights
227+
"""
228+
if df.empty or "Purpose" not in df.columns:
229+
return pd.DataFrame()
230+
231+
agg_dict = {"Duration": "count"} # Count of tasks
232+
if "Time Saved" in df.columns:
233+
agg_dict["Time Saved"] = "mean"
234+
if "Satisfaction" in df.columns:
235+
agg_dict["Satisfaction"] = "mean"
236+
if "Workflow Impact" in df.columns:
237+
agg_dict["Workflow Impact"] = lambda x: x.value_counts().index[0] if not x.empty else None
238+
239+
purpose_stats = df.groupby("Purpose").agg(agg_dict).reset_index()
240+
241+
# Rename columns for clarity
242+
rename_dict = {
243+
"Time Saved": "Avg Time Saved",
244+
"Satisfaction": "Avg Satisfaction",
245+
"Workflow Impact": "Most Common Workflow Impact",
246+
"Duration": "# Tasks"
247+
}
248+
purpose_stats.rename(columns=rename_dict, inplace=True)
249+
250+
return purpose_stats

0 commit comments

Comments
 (0)