Merge pull request #3 from Kaushalsurana/main

james-pplx · web-flow · commit 756e33b42df0 · 2025-04-04T14:21:38.000-04:00
Add URL input support using --url argument
diff --git a/sonar-use-cases/fact_checker_cli/README.md b/sonar-use-cases/fact_checker_cli/README.md
@@ -16,7 +16,8 @@ A command-line tool that identifies false or misleading claims in articles or st
 ### 1. Install required dependencies
 
 ```bash
-pip install requests pydantic
+# Ensure you are using the same pip associated with the python3 you intend to run the script with
+pip install requests pydantic newspaper3k
 ```
 
 ### 2. Make the script executable
@@ -76,6 +77,12 @@ This will analyze the claim, research it using Perplexity's Sonar API, and retur
 ./fact_checker.py --file article.txt
 ```
 
+### Check an article from a URL
+
+```bash
+./fact_checker.py --url https://www.example.com/news/article-to-check
+```
+
 ### Specify a different model
 
 ```bash
@@ -156,4 +163,4 @@ Claim 1: ❌ FALSE
 - The accuracy of fact-checking depends on the quality of information available through the Perplexity Sonar API.
 - Like all language models, the underlying AI may have limitations in certain specialized domains.
 - The structured outputs feature requires a Tier 3 or higher Perplexity API account.
-- The tool does not replace professional fact-checking services for highly sensitive or complex content.
+- The tool does not replace professional fact-checking services for highly sensitive or complex content.
diff --git a/sonar-use-cases/fact_checker_cli/fact_checker.py b/sonar-use-cases/fact_checker_cli/fact_checker.py
@@ -15,6 +15,8 @@
 
 import requests
 from pydantic import BaseModel, Field
+from newspaper import Article, ArticleException
+from requests.exceptions import RequestException
 
 
 class Claim(BaseModel):
@@ -92,6 +94,8 @@ def _load_system_prompt(self, prompt_file: str) -> str:
         try:
             with open(prompt_file, 'r', encoding='utf-8') as f:
                 return f.read().strip()
+        except FileNotFoundError:
+            print(f"Warning: Prompt file not found at {prompt_file}", file=sys.stderr)
         except Exception as e:
             print(f"Warning: Could not load system prompt from {prompt_file}: {e}", file=sys.stderr)
             print("Using default system prompt.", file=sys.stderr)
@@ -113,6 +117,8 @@ def check_claim(self, text: str, model: str = DEFAULT_MODEL, use_structured_outp
         Returns:
             The parsed response containing fact check results.
         """
+        if not text or not text.strip():
+            return {"error": "Input text is empty. Cannot perform fact check."}
         user_prompt = f"Fact check the following text and identify any false or misleading claims:\n\n{text}"
 
         headers = {
@@ -290,9 +296,10 @@ def main():
     input_group = parser.add_mutually_exclusive_group(required=True)
     input_group.add_argument("-t", "--text", type=str, help="Text to fact check")
     input_group.add_argument("-f", "--file", type=str, help="Path to file containing text to fact check")
+    input_group.add_argument("-u", "--url", type=str, help="URL of the article to fact check")
     
     parser.add_argument(
-        "-m", 
+        "-m",
         "--model", 
         type=str, 
         default=FactChecker.DEFAULT_MODEL,
@@ -334,9 +341,35 @@ def main():
             except Exception as e:
                 print(f"Error reading file: {e}", file=sys.stderr)
                 return 1
-        else:
+        elif args.url:
+            try:
+                print(f"Fetching content from URL: {args.url}", file=sys.stderr)
+                response = requests.get(args.url, timeout=15) # Add a timeout
+                response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
+                
+                article = Article(url=args.url)
+                article.download(input_html=response.text)
+                article.parse()
+                text = article.text
+                if not text:
+                    print(f"Error: Could not extract text from URL: {args.url}", file=sys.stderr)
+                    return 1
+            except RequestException as e:
+                print(f"Error fetching URL: {e}", file=sys.stderr)
+                return 1
+            except ArticleException as e:
+                 print(f"Error parsing article content: {e}", file=sys.stderr)
+                 return 1
+            except Exception as e: # Catch other potential errors during fetch/parse
+                print(f"An unexpected error occurred while processing the URL: {e}", file=sys.stderr)
+                return 1
+        else: # This corresponds to args.text
             text = args.text
-        
+
+        if not text: # Ensure text is not empty before proceeding
+             print("Error: No text found to fact check.", file=sys.stderr)
+             return 1
+
         print("Fact checking in progress...", file=sys.stderr)
         results = fact_checker.check_claim(
             text,