Added multiple llm provider support (#4)

rsb-23 · web-flow · commit 6f855ce29b16 · 2025-03-19T21:10:07.000+05:30
* added pylint
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -18,4 +18,5 @@ jobs:
           files: |
             tests/file.txt
             tests/file.json
-          GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
+          provider: groq
+          api_key: ${{ secrets.GROQ_API_KEY }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,11 +1,11 @@
 repos:
   - repo: https://github.com/PyCQA/isort
-    rev: 5.13.2
+    rev: 6.0.1
     hooks:
       - id: isort
 
   - repo: https://github.com/psf/black
-    rev: 24.10.0
+    rev: 25.1.0
     hooks:
       - id: black
 
@@ -15,10 +15,22 @@ repos:
       - id: check-yaml
       - id: end-of-file-fixer
       - id: no-commit-to-branch
-        args: [-b, main, -b, master]
+        args: [ -b, main, -b, master ]
 
   - repo: https://github.com/PyCQA/flake8
-    rev: 7.1.1
+    rev: 7.1.2
     hooks:
       - id: flake8
-        additional_dependencies: [flake8-pyproject]
+        additional_dependencies: [ flake8-pyproject ]
+  - repo: local
+    hooks:
+      - id: pylint
+        name: pylint
+        entry: pylint
+        language: system
+        types: [ python ]
+        args:
+          [
+            "-rn", # Only display messages
+            "-sn", # Don't display the score
+          ]
diff --git a/README.md b/README.md
@@ -3,7 +3,9 @@
 This Github Action uses AI to find typos and grammatical errors in specified data files.
 
 ## Usage
+
 Refer [test.yml](./.github/workflows/test.yml)
+
 ```yaml
   - name: Lint Text in Data Files
     uses: actions/text-linter@v1
@@ -12,5 +14,12 @@ Refer [test.yml](./.github/workflows/test.yml)
       files: |
         tests/file.txt
         tests/file.json
-      GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
+      provider: groq
+      api_key: ${{ secrets.GROQ_API_KEY }}
+      model:  //optional
 ```
+
+## Supported Models & Providers
+
+All text models supported by [Litellm](https://docs.litellm.ai/docs/providers).  
+Hence `provider` name should be same as given in this doc.
diff --git a/action.yml b/action.yml
@@ -14,9 +14,16 @@ inputs:
     description: 'Comma-separated list of files to analyze'
     required: true
     type: list
-  GROQ_API_KEY:
-    description: 'Groq API Key'
+  provider:
+    description: 'Provider of the language model (e.g., openai, anthropic, groq)'
     required: true
+    default: groq
+  api_key:
+    description: 'API Key for the specified provider'
+    required: true
+  model:
+    description: 'Language model by the specified provider'
+    required: false
 
 
 runs:
@@ -44,7 +51,9 @@ runs:
       env:
         PR_BASE: ${{ github.base_ref }}
         INPUT_FILES: ${{ inputs.files }}
-        GROQ_API_KEY: ${{ inputs.GROQ_API_KEY }}
+        PROVIDER: ${{ inputs.provider }}
+        API_KEY: ${{ inputs.api_key }}
+        LLM_MODEL: ${{ inputs.model }}
         token: ${{ inputs.token }}
         PR_NO: ${{ github.event.pull_request.number }}
       run: |
diff --git a/local_test.py b/local_test.py
@@ -1,7 +1,9 @@
-from dotenv import load_dotenv
+from dotenv import load_dotenv, set_key
 
 load_dotenv()
 
+set_key(".env", "ENVIRONMENT", "local")
+
 if __name__ == "__main__":
     from src.text_linter import main
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -14,5 +14,12 @@ profile = "black"
 line_length = 120
 multi_line_output = 3
 
+[tool.pylint.master]
+jobs = 2
+ignore-paths = "(?!(src|tests))/*"
+
 [tool.pylint.format]
 max-line-length = 130
+
+[tool.pylint.messages_control]
+disable = ["I", "import-error", "missing-docstring", "duplicate-code"]
diff --git a/requirements.txt b/requirements.txt
@@ -1,2 +1,2 @@
-groq==0.13.0
+litellm~=1.63.11
 requests~=2.32.3
diff --git a/src/config.py b/src/config.py
@@ -1,14 +1,24 @@
 import os
+from dataclasses import dataclass
 
 
-def get_env(x):
-    return os.environ[x]
+def get_env(x, optional=False):
+    return os.environ.get(x) if optional else os.environ[x]
 
 
-GROQ_API_KEY = get_env("GROQ_API_KEY")
-PAT_TOKEN = get_env("token")
+ENVIRONMENT = get_env("ENVIRONMENT", optional=True)
 
-REPO = get_env("GITHUB_REPOSITORY")
-PR_BASE = get_env("PR_BASE")
-PR_NO = get_env("PR_NO")
-INPUT_FILES = [*map(str.strip, get_env("INPUT_FILES").splitlines())]
+
+@dataclass
+class GitEnv:
+    PAT_TOKEN = get_env("token")
+    REPO = get_env("GITHUB_REPOSITORY")
+    PR_BASE = get_env("PR_BASE")
+    PR_NO = get_env("PR_NO")
+    INPUT_FILES = [*map(str.strip, get_env("INPUT_FILES").splitlines())]
+
+
+PROVIDER = get_env("PROVIDER").lower()
+LLM_API_KEY = get_env("API_KEY")
+LLM_MODELS = {"openai": "gpt-3.5-turbo", "anthropic": "claude-3-haiku-20240307", "groq": "llama3-70b-8192"}
+LLM_MODEL = get_env("LLM_MODEL", optional=True) or LLM_MODELS[PROVIDER]
diff --git a/src/llm_service.py b/src/llm_service.py
@@ -1,39 +1,39 @@
 import json
-from functools import cache
 
-from groq import NOT_GIVEN, BadRequestError, Groq
+from litellm import BadRequestError, completion, validate_environment
 
-from src.config import GROQ_API_KEY
+from src.config import LLM_API_KEY, LLM_MODEL, PROVIDER
 
-LLM_MODEL = ["llama-3.1-8b-instant", "llama3-70b-8192"][1]
 
+def validate_model(provider: str = PROVIDER, model: str = LLM_MODEL) -> None:
+    model_str = f"{provider}/{model}"
+    assert not validate_environment(model_str)["keys_in_environment"], f"Invalid value : {model}"
 
-@cache
-def get_groq_client():
-    return Groq(api_key=GROQ_API_KEY)
 
-
-def ask_groq(query_text: str, system_content: str, json_schema: dict = None) -> dict:
-    client = get_groq_client()
-    response_format = NOT_GIVEN
+def ask_llm(query_text: str, system_content: str, json_schema: dict = None, provider=PROVIDER) -> dict:
+    response_format = None
     if json_schema:
         system_content = f"{system_content}\n Use this json schema to reply {json.dumps(json_schema)}"
         response_format = {"type": "json_object"}
     try:
-        chat_completion = client.chat.completions.create(
+        # Send a message to the model
+        print("calling llm...")
+        response = completion(
+            model=f"{provider}/{LLM_MODEL}",
+            api_key=LLM_API_KEY,
             messages=[{"role": "system", "content": system_content}, {"role": "user", "content": query_text}],
-            model=LLM_MODEL,
             response_format=response_format,
         )
-        json_str = chat_completion.choices[0].message.content
-        json_str = json_str.strip()
+        json_str = response["choices"][0]["message"]["content"]
     except BadRequestError as e:
-        failed_json = e.body["error"]["failed_generation"]  # noqa
-        json_str = failed_json.replace('"""', '"').replace("\n", "\\n")
+        json_str = e.message  # reusing variable for showing error
+
+    if json_str.startswith("litellm"):
+        raise RuntimeError(json_str)
+
     if not json_schema:
         return json_str
     try:
-        # json_str = re.sub("\n +", "", json_str)
         return json.loads(json_str)
     except json.JSONDecodeError as e:
         print(e.doc.encode(), e.pos)
@@ -47,7 +47,7 @@ def find_typos(query_text):
         " and list mistakes (if any) along with suggestion, not fixes."
         "Keep it short, no explanation. Do not modify urls, usernames and hashtags."
     )
-    ans = ask_groq(query_text, system_content, json_schema={"suggestions": ["str"]})
+    ans = ask_llm(query_text, system_content, json_schema={"suggestions": ["str"]})
     return ans["suggestions"]
 
 
@@ -57,12 +57,12 @@ def fix_typos(query_text):
         "without any additional info like headings, footers, etc."
         "Do not modify urls, usernames and hashtags. `~~~` is a phrase seperator, keep it as it is."
     )
-    return ask_groq(query_text, system_content)
+    return ask_llm(query_text, system_content)
 
 
 if __name__ == "__main__":
     from dotenv import load_dotenv  # noqa
 
     load_dotenv()
-    answer = ask_groq("Debuging is hard.", system_content="")
-    print(answer)
+    answer = ask_llm("Debuging is hard.", system_content="")
+    print(f"Response from model: {answer}")
diff --git a/src/text_linter.py b/src/text_linter.py
@@ -2,11 +2,11 @@
 
 import requests
 
-from src.config import INPUT_FILES, PAT_TOKEN, PR_BASE, PR_NO, REPO
-from src.groq_ai import find_typos
+from src.config import ENVIRONMENT, GitEnv
+from src.llm_service import find_typos, validate_model
 
 
-def process_diff(file_path, base_branch=PR_BASE):
+def process_diff(file_path, base_branch=GitEnv.PR_BASE):
     try:
         # Get diff from PR
         diff_command = f"git diff -U0 origin/{base_branch}... -- {file_path}"
@@ -21,19 +21,24 @@ def process_diff(file_path, base_branch=PR_BASE):
 
 
 def post_comment(comment):
-    url = f"https://api.github.com/repos/{REPO}/issues/{PR_NO}/comments"
+    print(comment)
+    if ENVIRONMENT == "local":
+        return
+
+    url = f"https://api.github.com/repos/{GitEnv.REPO}/issues/{GitEnv.PR_NO}/comments"
     headers = {
         "Accept": "application/vnd.github+json",
-        "Authorization": f"Bearer {PAT_TOKEN}",
+        "Authorization": f"Bearer {GitEnv.PAT_TOKEN}",
         "X-GitHub-Api-Version": "2022-11-28",
     }
     resp = requests.post(url, headers=headers, json={"body": comment}, timeout=300)
     resp.raise_for_status()
 
 
 def main():
+    validate_model()
     # Process diff(s) for each file
-    results = {file_path: process_diff(file_path) for file_path in INPUT_FILES if file_path}
+    results = {file_path: process_diff(file_path) for file_path in GitEnv.INPUT_FILES if file_path}
 
     # Create markdown comment for fixes
     flag = False
@@ -49,7 +54,6 @@ def main():
     else:
         comment = "### No typos found"
 
-    print(comment)
     post_comment(comment)
 
 
diff --git a/tests/file.json b/tests/file.json
@@ -1 +1 @@
-{"fruit1": "pineaple", "friut2": "graep"}
+{"fruit1": "pineaple", "friut2": "graepi"}
diff --git a/tests/file.txt b/tests/file.txt
@@ -1,2 +1,2 @@
 reciever
-bluetooth
+bluetooh

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`		`-groq==0.13.0`
	`1`	`+litellm~=1.63.11`
`2`	`2`	`requests~=2.32.3`