|
| 1 | +import openai |
| 2 | +import os |
| 3 | +import sys |
| 4 | +import logging |
| 5 | +import json |
| 6 | + |
| 7 | +class Logger: |
| 8 | + @staticmethod |
| 9 | + def configure(log_level=None): |
| 10 | + log_level = log_level or os.getenv("LOG_LEVEL", "ERROR").upper() |
| 11 | + levels = { |
| 12 | + "DEBUG": logging.DEBUG, |
| 13 | + "INFO": logging.INFO, |
| 14 | + "WARNING": logging.WARNING, |
| 15 | + "ERROR": logging.ERROR, |
| 16 | + "CRITICAL": logging.CRITICAL |
| 17 | + } |
| 18 | + logging.basicConfig(level=levels.get(log_level, logging.ERROR), |
| 19 | + format='%(asctime)s - %(levelname)s - %(message)s') |
| 20 | + |
| 21 | +class FileHandler: |
| 22 | + @staticmethod |
| 23 | + def read_file(file_path): |
| 24 | + try: |
| 25 | + with open(file_path, 'r') as file: |
| 26 | + return file.readlines() |
| 27 | + except FileNotFoundError: |
| 28 | + logging.error(f"File not found: {file_path}") |
| 29 | + return None |
| 30 | + except Exception as e: |
| 31 | + logging.error(f"Error reading file {file_path}: {e}") |
| 32 | + return None |
| 33 | + |
| 34 | + @staticmethod |
| 35 | + def save_result(file_name, result): |
| 36 | + try: |
| 37 | + with open("spell_check_result_with_lines.json", "a") as result_file: |
| 38 | + result_file.write(f"Results for {file_name}:\n{result}\n\n") |
| 39 | + except Exception as e: |
| 40 | + logging.error(f"Error saving result for {file_name}: {e}") |
| 41 | + |
| 42 | + @staticmethod |
| 43 | + def load_result(file_path): |
| 44 | + try: |
| 45 | + with open(file_path, 'r') as file: |
| 46 | + content = file.read() |
| 47 | + return content |
| 48 | + except Exception as e: |
| 49 | + logging.error(f"Error loading result file: {e}") |
| 50 | + return None |
| 51 | + |
| 52 | +class SpellChecker: |
| 53 | + def __init__(self): |
| 54 | + self.api_key = self.get_api_key() |
| 55 | + openai.api_key = self.api_key |
| 56 | + |
| 57 | + @staticmethod |
| 58 | + def get_api_key(): |
| 59 | + api_key = os.getenv("OPENAI_API_KEY") |
| 60 | + if not api_key: |
| 61 | + logging.error("OPENAI_API_KEY environment variable not set.") |
| 62 | + sys.exit(1) |
| 63 | + return api_key |
| 64 | + |
| 65 | + def check_spelling_with_line_numbers(self, numbered_content): |
| 66 | + try: |
| 67 | + response = openai.ChatCompletion.create( |
| 68 | + model="gpt-4o", |
| 69 | + messages=[ |
| 70 | + {"role": "system", "content": "You are a helpful assistant checking spelling and grammar."}, |
| 71 | + {"role": "user", "content": ( |
| 72 | + "You are a helpful assistant that checks and corrects only spelling and grammar issues in markdown files, " |
| 73 | + "without altering any other content such as indentation, line numbers, or formatting.\n" |
| 74 | + "For each line provided, return a JSON object with the following fields only if the category is not 'none':\n" |
| 75 | + "- original_text: contains the original line content\n" |
| 76 | + "- suggested_text: contains the corrected line with correct grammar and spelling\n" |
| 77 | + "- line_number: the exact line number of the original md file\n" |
| 78 | + "- category: either 'spelling issue', 'grammar issue', or 'both'\n\n" |
| 79 | + "Only include entries where the category is 'spelling issue', 'grammar issue', or 'both'.\n" |
| 80 | + "If a line has no issues, do not return it.\n" |
| 81 | + "If a line has no issues, return a message in json saying everything looks good to me 🎉.\n\n" |
| 82 | + "Here are the lines:\n" |
| 83 | + f"{''.join(numbered_content)}" |
| 84 | + )} |
| 85 | + ], |
| 86 | + max_tokens=16000 |
| 87 | + ) |
| 88 | + return response['choices'][0]['message']['content'] |
| 89 | + except Exception as e: |
| 90 | + logging.error(f"Error during OpenAI API request: {e}") |
| 91 | + return None |
| 92 | + |
| 93 | +class SpellCheckProcessor: |
| 94 | + def __init__(self, file_paths): |
| 95 | + self.file_paths = file_paths |
| 96 | + self.spell_checker = SpellChecker() |
| 97 | + |
| 98 | + def inject_line_numbers(self, lines): |
| 99 | + return [f"{idx + 1}: {line.strip()}" for idx, line in enumerate(lines)] |
| 100 | + |
| 101 | + def process_files(self): |
| 102 | + for file_path in self.file_paths: |
| 103 | + file_lines = FileHandler.read_file(file_path) |
| 104 | + if file_lines: |
| 105 | + logging.info(f"Processing file: {file_path}") |
| 106 | + numbered_content = self.inject_line_numbers(file_lines) |
| 107 | + result = self.spell_checker.check_spelling_with_line_numbers(numbered_content) |
| 108 | + if result: |
| 109 | + FileHandler.save_result(file_path, result) |
| 110 | + else: |
| 111 | + logging.error(f"Failed to get spell check result for {file_path}") |
| 112 | + else: |
| 113 | + logging.error(f"Skipping file {file_path} due to read error.") |
| 114 | + |
| 115 | +class PRChecker: |
| 116 | + @staticmethod |
| 117 | + def should_fail_pr(result_file_path): |
| 118 | + try: |
| 119 | + with open(result_file_path, 'r') as result_file: |
| 120 | + content = result_file.read() |
| 121 | + |
| 122 | + result_blocks = content.split("Results for") |
| 123 | + fail_pr = False |
| 124 | + |
| 125 | + for block in result_blocks[1:]: |
| 126 | + if "```json" in block: |
| 127 | + json_block = block.split("```json")[1].strip().rstrip("```").strip() |
| 128 | + if not json_block: |
| 129 | + continue |
| 130 | + |
| 131 | + try: |
| 132 | + result_json = json.loads(json_block) |
| 133 | + except json.JSONDecodeError as e: |
| 134 | + logging.error(f"JSON decode error: {e} in block: {json_block}") |
| 135 | + continue |
| 136 | + |
| 137 | + if isinstance(result_json, dict) and "message" in result_json: |
| 138 | + continue |
| 139 | + |
| 140 | + if isinstance(result_json, list): |
| 141 | + for entry in result_json: |
| 142 | + category = entry.get("category", "") |
| 143 | + if category in ["spelling issue", "both"]: |
| 144 | + logging.info(f"Found {category} in line {entry.get('line_number')}.") |
| 145 | + fail_pr = True |
| 146 | + break |
| 147 | + return fail_pr |
| 148 | + except Exception as e: |
| 149 | + logging.error(f"Error checking PR result file: {e}") |
| 150 | + return True |
| 151 | + |
| 152 | +def main(): |
| 153 | + Logger.configure() |
| 154 | + |
| 155 | + if len(sys.argv) < 2: |
| 156 | + logging.error("Please provide at least one file to process.") |
| 157 | + sys.exit(1) |
| 158 | + |
| 159 | + processor = SpellCheckProcessor(sys.argv[1:]) |
| 160 | + processor.process_files() |
| 161 | + |
| 162 | + result_file_path = "spell_check_result_with_lines.json" |
| 163 | + if PRChecker.should_fail_pr(result_file_path): |
| 164 | + logging.error("Spelling issues found. Failing the PR.") |
| 165 | + sys.exit(1) |
| 166 | + else: |
| 167 | + logging.info("No spelling issues found, PR can pass.") |
| 168 | + sys.exit(0) |
| 169 | + |
| 170 | +if __name__ == "__main__": |
| 171 | + main() |
0 commit comments