Skip to content

Commit 666c01c

Browse files
committed
remove the unnecessary tool statistics information
1 parent 52e1e09 commit 666c01c

File tree

1 file changed

+2
-88
lines changed

1 file changed

+2
-88
lines changed

libs/miroflow-tools/src/miroflow_tools/dev_mcp_servers/jina_scrape_llm_summary.py

Lines changed: 2 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,6 @@ async def scrape_and_extract_info(
4343
- url (str): The original URL
4444
- extracted_info (str): The extracted information
4545
- error (str): Error message if the operation failed
46-
- scrape_stats (Dict): Statistics about the scraped content
47-
- model_used (str): The model used for summarization
48-
- tokens_used (int): Number of tokens used (if available)
4946
"""
5047
if _is_huggingface_dataset_or_space_url(url):
5148
return json.dumps(
@@ -54,8 +51,6 @@ async def scrape_and_extract_info(
5451
"url": url,
5552
"extracted_info": "",
5653
"error": "You are trying to scrape a Hugging Face dataset for answers, please do not use the scrape tool for this purpose.",
57-
"scrape_stats": {},
58-
"tokens_used": 0,
5954
},
6055
ensure_ascii=False,
6156
)
@@ -80,8 +75,6 @@ async def scrape_and_extract_info(
8075
"url": url,
8176
"extracted_info": "",
8277
"error": f"Scraping failed (both Jina and Python): {scrape_result['error']}",
83-
"scrape_stats": {},
84-
"tokens_used": 0,
8578
},
8679
ensure_ascii=False,
8780
)
@@ -106,14 +99,6 @@ async def scrape_and_extract_info(
10699
"url": url,
107100
"extracted_info": extracted_result["extracted_info"],
108101
"error": extracted_result["error"],
109-
"scrape_stats": {
110-
"line_count": scrape_result["line_count"],
111-
"char_count": scrape_result["char_count"],
112-
"last_char_line": scrape_result["last_char_line"],
113-
"all_content_displayed": scrape_result["all_content_displayed"],
114-
},
115-
"model_used": extracted_result["model_used"],
116-
"tokens_used": extracted_result["tokens_used"],
117102
},
118103
ensure_ascii=False,
119104
)
@@ -146,12 +131,8 @@ async def scrape_url_with_jina(
146131
Dict[str, Any]: A dictionary containing:
147132
- success (bool): Whether the operation was successful
148133
- filename (str): Absolute path to the temporary file containing the scraped content
149-
- content (str): The scraped content of the first 40k characters
134+
- content (str): The scraped content (truncated to max_chars if necessary)
150135
- error (str): Error message if the operation failed
151-
- line_count (int): Number of lines in the scraped content
152-
- char_count (int): Number of characters in the scraped content
153-
- last_char_line (int): Line number where the last displayed character is located
154-
- all_content_displayed (bool): Signal indicating if all content was displayed (True if content <= 40k chars)
155136
"""
156137

157138
# Validate input
@@ -161,10 +142,6 @@ async def scrape_url_with_jina(
161142
"filename": "",
162143
"content": "",
163144
"error": "URL cannot be empty",
164-
"line_count": 0,
165-
"char_count": 0,
166-
"last_char_line": 0,
167-
"all_content_displayed": False,
168145
}
169146

170147
# Get API key from environment
@@ -174,10 +151,6 @@ async def scrape_url_with_jina(
174151
"filename": "",
175152
"content": "",
176153
"error": "JINA_API_KEY environment variable is not set",
177-
"line_count": 0,
178-
"char_count": 0,
179-
"last_char_line": 0,
180-
"all_content_displayed": False,
181154
}
182155

183156
# Avoid duplicate Jina URL prefix
@@ -301,10 +274,6 @@ async def scrape_url_with_jina(
301274
"filename": "",
302275
"content": "",
303276
"error": error_msg,
304-
"line_count": 0,
305-
"char_count": 0,
306-
"last_char_line": 0,
307-
"all_content_displayed": False,
308277
}
309278

310279
# Get the scraped content
@@ -316,10 +285,6 @@ async def scrape_url_with_jina(
316285
"filename": "",
317286
"content": "",
318287
"error": "No content returned from Jina.ai API",
319-
"line_count": 0,
320-
"char_count": 0,
321-
"last_char_line": 0,
322-
"all_content_displayed": False,
323288
}
324289

325290
# handle insufficient balance error
@@ -336,35 +301,15 @@ async def scrape_url_with_jina(
336301
"filename": "",
337302
"content": "",
338303
"error": "Insufficient balance",
339-
"line_count": 0,
340-
"char_count": 0,
341-
"last_char_line": 0,
342-
"all_content_displayed": False,
343304
}
344305

345-
# Get content statistics
346-
total_char_count = len(content)
347-
total_line_count = content.count("\n") + 1 if content else 0
348-
349306
# Extract first max_chars characters
350307
displayed_content = content[:max_chars]
351-
all_content_displayed = total_char_count <= max_chars
352-
353-
# Calculate the line number of the last character displayed
354-
if displayed_content:
355-
# Count newlines up to the last displayed character
356-
last_char_line = displayed_content.count("\n") + 1
357-
else:
358-
last_char_line = 0
359308

360309
return {
361310
"success": True,
362311
"content": displayed_content,
363312
"error": "",
364-
"line_count": total_line_count,
365-
"char_count": total_char_count,
366-
"last_char_line": last_char_line,
367-
"all_content_displayed": all_content_displayed,
368313
}
369314

370315

@@ -382,23 +327,15 @@ async def scrape_url_with_python(
382327
Returns:
383328
Dict[str, Any]: A dictionary containing:
384329
- success (bool): Whether the operation was successful
385-
- content (str): The scraped content
330+
- content (str): The scraped content (truncated to max_chars if necessary)
386331
- error (str): Error message if the operation failed
387-
- line_count (int): Number of lines in the scraped content
388-
- char_count (int): Number of characters in the scraped content
389-
- last_char_line (int): Line number where the last displayed character is located
390-
- all_content_displayed (bool): Signal indicating if all content was displayed
391332
"""
392333
# Validate input
393334
if not url or not url.strip():
394335
return {
395336
"success": False,
396337
"content": "",
397338
"error": "URL cannot be empty",
398-
"line_count": 0,
399-
"char_count": 0,
400-
"last_char_line": 0,
401-
"all_content_displayed": False,
402339
}
403340

404341
try:
@@ -511,10 +448,6 @@ async def scrape_url_with_python(
511448
"success": False,
512449
"content": "",
513450
"error": error_msg,
514-
"line_count": 0,
515-
"char_count": 0,
516-
"last_char_line": 0,
517-
"all_content_displayed": False,
518451
}
519452

520453
# Get the scraped content
@@ -525,34 +458,15 @@ async def scrape_url_with_python(
525458
"success": False,
526459
"content": "",
527460
"error": "No content returned from URL",
528-
"line_count": 0,
529-
"char_count": 0,
530-
"last_char_line": 0,
531-
"all_content_displayed": False,
532461
}
533462

534-
# Get content statistics
535-
total_char_count = len(content)
536-
total_line_count = content.count("\n") + 1 if content else 0
537-
538463
# Extract first max_chars characters
539464
displayed_content = content[:max_chars]
540-
all_content_displayed = total_char_count <= max_chars
541-
542-
# Calculate the line number of the last character displayed
543-
if displayed_content:
544-
last_char_line = displayed_content.count("\n") + 1
545-
else:
546-
last_char_line = 0
547465

548466
return {
549467
"success": True,
550468
"content": displayed_content,
551469
"error": "",
552-
"line_count": total_line_count,
553-
"char_count": total_char_count,
554-
"last_char_line": last_char_line,
555-
"all_content_displayed": all_content_displayed,
556470
}
557471

558472

0 commit comments

Comments
 (0)