Skip to content

Commit 1d30bbf

Browse files
samuelclayclaude
andcommitted
Switch web feed analysis to Haiku and add web feed usage metrics to Grafana
Web feed LLM analysis was using Opus ($9.50/day) for XPath extraction — a structured task that Haiku handles well (~$0.50/day). Also adds full web feed usage tracking: Redis counters for analyses, subscriptions, hint refines, variant selections, and success/failure rates, exposed via Prometheus endpoint with 13 new Grafana dashboard panels. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent dc5a296 commit 1d30bbf

File tree

9 files changed

+1153
-17
lines changed

9 files changed

+1153
-17
lines changed

apps/monitor/urls.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,4 +55,5 @@
5555
url(r"^user-searches?$", UserSearches.as_view(), name="user_searches"),
5656
url(r"^trending-feeds?$", TrendingFeeds.as_view(), name="trending_feeds"),
5757
url(r"^trending-subscriptions?$", TrendingSubscriptions.as_view(), name="trending_subscriptions"),
58+
url(r"^webfeed-usage?$", WebFeedUsage.as_view(), name="webfeed_usage"),
5859
]

apps/monitor/views/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,4 @@
2323
from apps.monitor.views.newsblur_updates import Updates
2424
from apps.monitor.views.newsblur_user_searches import UserSearches
2525
from apps.monitor.views.newsblur_users import Users
26+
from apps.monitor.views.newsblur_webfeed_usage import WebFeedUsage
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import time
2+
3+
from django.shortcuts import render
4+
from django.views import View
5+
6+
from apps.statistics.rtrending_webfeeds import RTrendingWebFeed
7+
from apps.webfeed.models import MWebFeedConfig
8+
9+
10+
class WebFeedUsage(View):
11+
def get(self, request):
12+
"""
13+
Prometheus metrics endpoint for web feed usage.
14+
15+
Tracks analyses, subscriptions, hint refinements, variant selections,
16+
success/failure rates, and overall funnel conversion.
17+
"""
18+
start_time = time.time()
19+
20+
formatted_data = {}
21+
chart_name = "webfeed_usage"
22+
chart_type = "gauge"
23+
24+
stats = RTrendingWebFeed.get_stats_for_prometheus()
25+
26+
# Simple counters
27+
for key in [
28+
"analyses_today",
29+
"analyses_with_hint_today",
30+
"reanalyses_today",
31+
"subscriptions_today",
32+
"unique_urls_analyzed_today",
33+
"unique_urls_subscribed_today",
34+
"unique_users_analyzing_today",
35+
"unique_users_subscribing_today",
36+
"analysis_success_today",
37+
"analysis_fail_today",
38+
"conversion_rate_pct",
39+
]:
40+
formatted_data[key] = f'{chart_name}{{metric="{key}"}} {stats[key]}'
41+
42+
# Variant choices
43+
for variant_idx in range(5):
44+
count = stats["variant_choices"].get(str(variant_idx), 0)
45+
formatted_data[f"variant_{variant_idx}"] = (
46+
f'{chart_name}{{metric="variant_choice",variant="{variant_idx}"}} {count}'
47+
)
48+
49+
# Daily totals for 7-day chart
50+
daily_totals = RTrendingWebFeed.get_daily_totals(days=7)
51+
for date_str, analyses, subscriptions, unique_users in daily_totals:
52+
formatted_data[f"daily_analyses_{date_str}"] = (
53+
f'{chart_name}{{metric="daily_analyses",date="{date_str}"}} {analyses}'
54+
)
55+
formatted_data[f"daily_subs_{date_str}"] = (
56+
f'{chart_name}{{metric="daily_subscriptions",date="{date_str}"}} {subscriptions}'
57+
)
58+
formatted_data[f"daily_users_{date_str}"] = (
59+
f'{chart_name}{{metric="daily_unique_users",date="{date_str}"}} {unique_users}'
60+
)
61+
62+
# MongoDB counts for total active web feeds
63+
try:
64+
total_active = MWebFeedConfig.objects.count()
65+
needs_reanalysis = MWebFeedConfig.objects.filter(needs_reanalysis=True).count()
66+
except Exception:
67+
total_active = 0
68+
needs_reanalysis = 0
69+
70+
formatted_data["total_active_webfeeds"] = (
71+
f'{chart_name}{{metric="total_active_webfeeds"}} {total_active}'
72+
)
73+
formatted_data["webfeeds_needing_reanalysis"] = (
74+
f'{chart_name}{{metric="webfeeds_needing_reanalysis"}} {needs_reanalysis}'
75+
)
76+
77+
elapsed_ms = (time.time() - start_time) * 1000
78+
formatted_data["scrape_duration"] = f'{chart_name}{{metric="scrape_duration_ms"}} {elapsed_ms:.1f}'
79+
80+
context = {
81+
"data": formatted_data,
82+
"chart_name": chart_name,
83+
"chart_type": chart_type,
84+
}
85+
return render(request, "monitor/prometheus_data.html", context, content_type="text/plain")
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
import datetime
2+
import hashlib
3+
4+
import redis
5+
from django.conf import settings
6+
7+
8+
class RTrendingWebFeed:
9+
"""
10+
Tracks web feed usage events: analyses, subscriptions, hint refinements,
11+
variant selections, and re-analyses.
12+
13+
Redis Key Structure:
14+
- wfAna:{date} -> sorted set {url_hash: count} for analyses
15+
- wfSub:{date} -> sorted set {url_hash: count} for subscriptions
16+
- wfAnaUsers:{date} -> set of user IDs who analyzed
17+
- wfSubUsers:{date} -> set of user IDs who subscribed
18+
- wfHints:{date} -> integer counter for hint/refine analyses
19+
- wfReanalyze:{date} -> integer counter for re-analyses
20+
- wfVariant:{date} -> sorted set {variant_index: count}
21+
- wfAnaSuccess:{date} -> integer counter for successful analyses
22+
- wfAnaFail:{date} -> integer counter for failed analyses
23+
24+
All keys expire after 35 days.
25+
"""
26+
27+
TTL_DAYS = 35
28+
29+
@classmethod
30+
def _redis(cls):
31+
return redis.Redis(connection_pool=settings.REDIS_STATISTICS_POOL)
32+
33+
@classmethod
34+
def _today(cls):
35+
return datetime.date.today().strftime("%Y-%m-%d")
36+
37+
@classmethod
38+
def _ttl(cls):
39+
return cls.TTL_DAYS * 24 * 60 * 60
40+
41+
@classmethod
42+
def _url_hash(cls, url):
43+
return hashlib.sha256(url.encode("utf-8")).hexdigest()[:16]
44+
45+
@classmethod
46+
def record_analysis(cls, user_id, url, has_hint=False):
47+
r = cls._redis()
48+
today = cls._today()
49+
ttl = cls._ttl()
50+
url_hash = cls._url_hash(url)
51+
52+
pipe = r.pipeline()
53+
pipe.zincrby(f"wfAna:{today}", 1, url_hash)
54+
pipe.expire(f"wfAna:{today}", ttl)
55+
pipe.sadd(f"wfAnaUsers:{today}", str(user_id))
56+
pipe.expire(f"wfAnaUsers:{today}", ttl)
57+
if has_hint:
58+
pipe.incr(f"wfHints:{today}")
59+
pipe.expire(f"wfHints:{today}", ttl)
60+
pipe.execute()
61+
62+
@classmethod
63+
def record_reanalysis(cls, user_id):
64+
r = cls._redis()
65+
today = cls._today()
66+
ttl = cls._ttl()
67+
68+
pipe = r.pipeline()
69+
pipe.incr(f"wfReanalyze:{today}")
70+
pipe.expire(f"wfReanalyze:{today}", ttl)
71+
pipe.sadd(f"wfAnaUsers:{today}", str(user_id))
72+
pipe.expire(f"wfAnaUsers:{today}", ttl)
73+
pipe.execute()
74+
75+
@classmethod
76+
def record_analysis_result(cls, success=True):
77+
r = cls._redis()
78+
today = cls._today()
79+
ttl = cls._ttl()
80+
81+
key = f"wfAnaSuccess:{today}" if success else f"wfAnaFail:{today}"
82+
pipe = r.pipeline()
83+
pipe.incr(key)
84+
pipe.expire(key, ttl)
85+
pipe.execute()
86+
87+
@classmethod
88+
def record_subscription(cls, user_id, url, variant_index):
89+
r = cls._redis()
90+
today = cls._today()
91+
ttl = cls._ttl()
92+
url_hash = cls._url_hash(url)
93+
94+
pipe = r.pipeline()
95+
pipe.zincrby(f"wfSub:{today}", 1, url_hash)
96+
pipe.expire(f"wfSub:{today}", ttl)
97+
pipe.sadd(f"wfSubUsers:{today}", str(user_id))
98+
pipe.expire(f"wfSubUsers:{today}", ttl)
99+
pipe.zincrby(f"wfVariant:{today}", 1, str(variant_index))
100+
pipe.expire(f"wfVariant:{today}", ttl)
101+
pipe.execute()
102+
103+
@classmethod
104+
def get_daily_totals(cls, days=7):
105+
"""Get daily totals for analyses, subscriptions, and unique users."""
106+
r = cls._redis()
107+
results = []
108+
109+
for i in range(days):
110+
day = (datetime.date.today() - datetime.timedelta(days=i)).strftime("%Y-%m-%d")
111+
112+
pipe = r.pipeline()
113+
pipe.zrange(f"wfAna:{day}", 0, -1, withscores=True)
114+
pipe.zrange(f"wfSub:{day}", 0, -1, withscores=True)
115+
pipe.scard(f"wfAnaUsers:{day}")
116+
vals = pipe.execute()
117+
118+
analyses = sum(int(s) for _, s in vals[0])
119+
subscriptions = sum(int(s) for _, s in vals[1])
120+
unique_users = vals[2]
121+
122+
results.append((day, analyses, subscriptions, unique_users))
123+
124+
return results
125+
126+
@classmethod
127+
def get_stats_for_prometheus(cls):
128+
r = cls._redis()
129+
today = cls._today()
130+
131+
pipe = r.pipeline()
132+
pipe.zrange(f"wfAna:{today}", 0, -1, withscores=True) # 0: analyses
133+
pipe.zrange(f"wfSub:{today}", 0, -1, withscores=True) # 1: subscriptions
134+
pipe.scard(f"wfAnaUsers:{today}") # 2: unique analyzing users
135+
pipe.scard(f"wfSubUsers:{today}") # 3: unique subscribing users
136+
pipe.get(f"wfHints:{today}") # 4: hints
137+
pipe.get(f"wfReanalyze:{today}") # 5: re-analyses
138+
pipe.zrange(f"wfVariant:{today}", 0, -1, withscores=True) # 6: variant choices
139+
pipe.get(f"wfAnaSuccess:{today}") # 7: successes
140+
pipe.get(f"wfAnaFail:{today}") # 8: failures
141+
vals = pipe.execute()
142+
143+
analyses_total = sum(int(s) for _, s in vals[0])
144+
unique_urls_analyzed = len(vals[0])
145+
subscriptions_total = sum(int(s) for _, s in vals[1])
146+
unique_urls_subscribed = len(vals[1])
147+
unique_users_analyzing = vals[2]
148+
unique_users_subscribing = vals[3]
149+
hints = int(vals[4] or 0)
150+
reanalyses = int(vals[5] or 0)
151+
variant_choices = {
152+
(v.decode() if isinstance(v, bytes) else v): int(s) for v, s in vals[6]
153+
}
154+
successes = int(vals[7] or 0)
155+
failures = int(vals[8] or 0)
156+
157+
conversion_pct = 0
158+
if analyses_total > 0:
159+
conversion_pct = round(subscriptions_total / analyses_total * 100, 1)
160+
161+
return {
162+
"analyses_today": analyses_total,
163+
"analyses_with_hint_today": hints,
164+
"reanalyses_today": reanalyses,
165+
"subscriptions_today": subscriptions_total,
166+
"unique_urls_analyzed_today": unique_urls_analyzed,
167+
"unique_urls_subscribed_today": unique_urls_subscribed,
168+
"unique_users_analyzing_today": unique_users_analyzing,
169+
"unique_users_subscribing_today": unique_users_subscribing,
170+
"analysis_success_today": successes,
171+
"analysis_fail_today": failures,
172+
"variant_choices": variant_choices,
173+
"conversion_rate_pct": conversion_pct,
174+
}

apps/webfeed/tasks.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -307,11 +307,14 @@ def publish(event_type, extra=None):
307307
logging.user(user, f"~BB~FWWeb Feed: Fetching page ~SB{url}~SN")
308308

309309
# Step 1: Fetch page HTML
310+
from apps.statistics.rtrending_webfeeds import RTrendingWebFeed
311+
310312
page_html = fetch_page_html(url)
311313
if not page_html:
312314
error_msg = "Could not fetch the page. The site may be blocking requests."
313315
publish_event("error", {"error": error_msg})
314316
logging.user(user, f"~BB~FWWeb Feed: ~FR~SBFetch failed~SN~FW for ~SB{url}~SN")
317+
RTrendingWebFeed.record_analysis_result(success=False)
315318
return {"code": -1, "message": error_msg}
316319

317320
html_hash = hashlib.sha256(page_html[:10000].encode("utf-8", errors="replace")).hexdigest()[:16]
@@ -335,10 +338,10 @@ def publish(event_type, extra=None):
335338
publish_event("progress", {"message": "Finding story patterns..."})
336339

337340
# Step 2: Call Claude for XPath analysis
338-
from apps.ask_ai.providers import LLM_EXCEPTIONS, get_provider
341+
from apps.ask_ai.providers import LLM_EXCEPTIONS, get_briefing_provider
339342

340343
messages = get_analysis_messages(url, cleaned_html, story_hint=story_hint)
341-
provider, model_id, _ = get_provider("opus")
344+
provider, model_id = get_briefing_provider("haiku")
342345

343346
if not provider.is_configured():
344347
error_msg = "Anthropic API key not configured"
@@ -379,11 +382,13 @@ def publish(event_type, extra=None):
379382
error_msg = "Failed to parse AI response. Please try again."
380383
publish_event("error", {"error": error_msg})
381384
logging.user(user, f"~BB~FWWeb Feed: ~FR~SBJSON parse failed~SN~FW: {response_text[:200]}")
385+
RTrendingWebFeed.record_analysis_result(success=False)
382386
return {"code": -1, "message": error_msg}
383387

384388
if not isinstance(variants, list) or len(variants) == 0:
385389
error_msg = "No story patterns found on this page."
386390
publish_event("error", {"error": error_msg})
391+
RTrendingWebFeed.record_analysis_result(success=False)
387392
return {"code": -1, "message": error_msg}
388393

389394
# Extract page title
@@ -447,6 +452,8 @@ def publish(event_type, extra=None):
447452
)
448453
publish_event("complete")
449454

455+
RTrendingWebFeed.record_analysis_result(success=True)
456+
450457
return {
451458
"code": 1,
452459
"message": "Analysis complete",
@@ -460,6 +467,12 @@ def publish(event_type, extra=None):
460467
publish_event("error", {"error": error_msg})
461468
if user:
462469
logging.user(user, f"~BB~FWWeb Feed: ~FR~SBLLM error~SN~FW - {e}")
470+
try:
471+
from apps.statistics.rtrending_webfeeds import RTrendingWebFeed
472+
473+
RTrendingWebFeed.record_analysis_result(success=False)
474+
except Exception:
475+
pass
463476
return {"code": -1, "message": error_msg}
464477

465478
except Exception as e:
@@ -468,4 +481,10 @@ def publish(event_type, extra=None):
468481
publish_event("error", {"error": error_msg})
469482
if user:
470483
logging.user(user, f"~BB~FWWeb Feed: ~FR~SBUnexpected error~SN~FW - {e}")
484+
try:
485+
from apps.statistics.rtrending_webfeeds import RTrendingWebFeed
486+
487+
RTrendingWebFeed.record_analysis_result(success=False)
488+
except Exception:
489+
pass
471490
return {"code": -1, "message": error_msg}

apps/webfeed/views.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
from utils.user_functions import ajax_login_required
1010
from utils.view_functions import required_params
1111

12+
from apps.statistics.rtrending_webfeeds import RTrendingWebFeed
13+
1214
from .models import MWebFeedConfig
1315
from .tasks import AnalyzeWebFeedPage
1416

@@ -40,6 +42,8 @@ def analyze(request):
4042
f"~BB~FWWeb Feed: Analyzing ~SB{url}~SN" + (f" (hint: {story_hint})" if story_hint else ""),
4143
)
4244

45+
RTrendingWebFeed.record_analysis(request.user.pk, url, has_hint=bool(story_hint))
46+
4347
AnalyzeWebFeedPage.apply_async(
4448
kwargs={
4549
"user_id": request.user.pk,
@@ -157,6 +161,8 @@ def subscribe(request):
157161
except Exception as e:
158162
logging.user(request.user, f"~BB~FWWeb Feed: ~FR~SBFavicon import failed~SN~FW - {e}")
159163

164+
RTrendingWebFeed.record_subscription(request.user.pk, url, variant_index)
165+
160166
logging.user(request.user, f"~BB~FWWeb Feed: Subscribed to ~SB{url}~SN (feed {feed.pk})")
161167

162168
# Trigger background fetch for archive subscribers
@@ -203,6 +209,8 @@ def reanalyze(request):
203209

204210
url = feed.feed_address[len("webfeed:") :]
205211

212+
RTrendingWebFeed.record_reanalysis(request.user.pk)
213+
206214
logging.user(request.user, f"~BB~FWWeb Feed: Re-analyzing ~SB{url}~SN (feed {feed_id})")
207215

208216
AnalyzeWebFeedPage.apply_async(

0 commit comments

Comments
 (0)