Skip to content

Commit 5bb82c8

Browse files
authored
Merge pull request #40 from datum-cloud/fwidjaja-patch-3
v1.0.2
2 parents c4a93cc + b224460 commit 5bb82c8

File tree

1 file changed

+13
-31
lines changed

1 file changed

+13
-31
lines changed

scripts/monitor_news.py

Lines changed: 13 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -92,39 +92,21 @@ def search_category(self, category: str, config: Dict[str, Any]) -> List[Dict[st
9292
today = datetime.now()
9393
week_ago = today - timedelta(days=7)
9494

95-
# Construct the search prompt
96-
prompt = f"""Search for new cloud services and platforms launched in the past week in the "{category}" category.
95+
# Construct a shorter search prompt to reduce token usage
96+
prompt = f"""Find new cloud services in "{category}" launched in past 7 days.
9797
98-
Category description: {config['description']}
99-
Keywords to focus on: {', '.join(config['keywords'])}
98+
Keywords: {', '.join(config['keywords'][:3])}
10099
101-
For each service you find:
102-
1. Verify it's a real company/service (not just news about existing ones)
103-
2. Get the company name and primary URL
104-
3. Check if it appears to be a cloud/SaaS service
105-
4. Get a brief description of what they offer
100+
Return JSON array of NEW services only:
101+
[{{"company_name": "...", "url": "...", "description": "...", "category": "{category}"}}]
106102
107-
Return results as a JSON array with this format:
108-
[
109-
{{
110-
"company_name": "Acme GPU Cloud",
111-
"url": "https://acmegpu.com",
112-
"description": "Brief description of the service",
113-
"found_via": "TechCrunch article about...",
114-
"category": "{category}"
115-
}}
116-
]
117-
118-
Focus on NEW services (launched or announced in past 7 days).
119-
If you don't find any new services, return an empty array [].
120-
ONLY return the JSON array, no other text.
121-
"""
103+
Return empty array [] if none found. JSON only, no other text."""
122104

123105
try:
124106
# Call Claude with web search enabled
125107
response = self.client.messages.create(
126108
model="claude-sonnet-4-20250514",
127-
max_tokens=4096,
109+
max_tokens=2048, # Reduced from 4096 to save tokens
128110
tools=[{
129111
"type": "web_search_20250305",
130112
"name": "web_search"
@@ -152,13 +134,13 @@ def search_category(self, category: str, config: Dict[str, Any]) -> List[Dict[st
152134
return results
153135

154136
except anthropic.RateLimitError as e:
155-
print(f"⚠️ Rate limit hit for {category}, waiting 10 seconds...")
156-
time.sleep(10)
137+
print(f"⚠️ Rate limit hit for {category}, waiting 30 seconds...")
138+
time.sleep(30) # Increased from 10 to 30 seconds
157139
# Retry once
158140
try:
159141
response = self.client.messages.create(
160142
model="claude-sonnet-4-20250514",
161-
max_tokens=4096,
143+
max_tokens=2048, # Reduced from 4096
162144
tools=[{
163145
"type": "web_search_20250305",
164146
"name": "web_search"
@@ -211,10 +193,10 @@ def run_daily_monitor(self) -> Dict[str, Any]:
211193
all_candidates.extend(candidates)
212194

213195
# Rate limiting: wait between requests to avoid hitting API limits
214-
# 30k tokens/min limit = need ~2 second delay between calls
196+
# Web search uses LOTS of tokens, need longer delays
215197
if i < len(CATEGORIES) - 1: # Don't sleep after last one
216-
print(f"⏱️ Rate limiting: waiting 3 seconds...")
217-
time.sleep(3)
198+
print(f"⏱️ Rate limiting: waiting 20 seconds...")
199+
time.sleep(20) # 20 seconds = ~3 calls/min = safely under 30k tokens/min
218200

219201
# Deduplicate
220202
unique_candidates = self.deduplicate_against_existing(all_candidates)

0 commit comments

Comments
 (0)