15
15
16
16
import requests
17
17
from pydantic import BaseModel , Field
18
+ from newspaper import Article , ArticleException
19
+ from requests .exceptions import RequestException
18
20
19
21
20
22
class Claim (BaseModel ):
@@ -92,6 +94,8 @@ def _load_system_prompt(self, prompt_file: str) -> str:
92
94
try :
93
95
with open (prompt_file , 'r' , encoding = 'utf-8' ) as f :
94
96
return f .read ().strip ()
97
+ except FileNotFoundError :
98
+ print (f"Warning: Prompt file not found at { prompt_file } " , file = sys .stderr )
95
99
except Exception as e :
96
100
print (f"Warning: Could not load system prompt from { prompt_file } : { e } " , file = sys .stderr )
97
101
print ("Using default system prompt." , file = sys .stderr )
@@ -113,6 +117,8 @@ def check_claim(self, text: str, model: str = DEFAULT_MODEL, use_structured_outp
113
117
Returns:
114
118
The parsed response containing fact check results.
115
119
"""
120
+ if not text or not text .strip ():
121
+ return {"error" : "Input text is empty. Cannot perform fact check." }
116
122
user_prompt = f"Fact check the following text and identify any false or misleading claims:\n \n { text } "
117
123
118
124
headers = {
@@ -290,9 +296,10 @@ def main():
290
296
input_group = parser .add_mutually_exclusive_group (required = True )
291
297
input_group .add_argument ("-t" , "--text" , type = str , help = "Text to fact check" )
292
298
input_group .add_argument ("-f" , "--file" , type = str , help = "Path to file containing text to fact check" )
299
+ input_group .add_argument ("-u" , "--url" , type = str , help = "URL of the article to fact check" )
293
300
294
301
parser .add_argument (
295
- "-m" ,
302
+ "-m" ,
296
303
"--model" ,
297
304
type = str ,
298
305
default = FactChecker .DEFAULT_MODEL ,
@@ -334,9 +341,35 @@ def main():
334
341
except Exception as e :
335
342
print (f"Error reading file: { e } " , file = sys .stderr )
336
343
return 1
337
- else :
344
+ elif args .url :
345
+ try :
346
+ print (f"Fetching content from URL: { args .url } " , file = sys .stderr )
347
+ response = requests .get (args .url , timeout = 15 ) # Add a timeout
348
+ response .raise_for_status () # Raise HTTPError for bad responses (4xx or 5xx)
349
+
350
+ article = Article (url = args .url )
351
+ article .download (input_html = response .text )
352
+ article .parse ()
353
+ text = article .text
354
+ if not text :
355
+ print (f"Error: Could not extract text from URL: { args .url } " , file = sys .stderr )
356
+ return 1
357
+ except RequestException as e :
358
+ print (f"Error fetching URL: { e } " , file = sys .stderr )
359
+ return 1
360
+ except ArticleException as e :
361
+ print (f"Error parsing article content: { e } " , file = sys .stderr )
362
+ return 1
363
+ except Exception as e : # Catch other potential errors during fetch/parse
364
+ print (f"An unexpected error occurred while processing the URL: { e } " , file = sys .stderr )
365
+ return 1
366
+ else : # This corresponds to args.text
338
367
text = args .text
339
-
368
+
369
+ if not text : # Ensure text is not empty before proceeding
370
+ print ("Error: No text found to fact check." , file = sys .stderr )
371
+ return 1
372
+
340
373
print ("Fact checking in progress..." , file = sys .stderr )
341
374
results = fact_checker .check_claim (
342
375
text ,
0 commit comments