|
| 1 | +from typing import List |
| 2 | +import os |
| 3 | +import re |
| 4 | + |
| 5 | +text_characters = b"".join(map(lambda x: bytes((x,)), range(32, 127))) + b"\n\r\t\f\b" |
| 6 | +_null_trans = bytes.maketrans(b"", b"") |
| 7 | + |
| 8 | + |
| 9 | +def istextfile(filename: str, blocksize: int = 512) -> bool: |
| 10 | + return istext(open(filename, "rb").read(blocksize)) |
| 11 | + |
| 12 | + |
| 13 | +def istext(b: bytes) -> bool: |
| 14 | + if b"\0" in b: |
| 15 | + return False |
| 16 | + |
| 17 | + if not b: # Empty files are considered text |
| 18 | + return True |
| 19 | + |
| 20 | + # Get the non-text characters (maps a character to itself then |
| 21 | + # use the 'remove' option to get rid of the text characters.) |
| 22 | + t = b.translate(_null_trans, text_characters) |
| 23 | + |
| 24 | + # If more than 30% non-text characters, then |
| 25 | + # this is considered a binary file |
| 26 | + if len(t) / len(b) > 0.30: |
| 27 | + return False |
| 28 | + return True |
| 29 | + |
| 30 | + |
| 31 | +def grep_directory( |
| 32 | + directory: str, pattern: re.Pattern, recursive: bool = False, include_binary: bool = False |
| 33 | +) -> List[str]: |
| 34 | + """ |
| 35 | + Searches for a regex in a directory. |
| 36 | +
|
| 37 | + :param directory: The directory to search in. |
| 38 | + :param pattern: The regex to search with. |
| 39 | + :param recursive: (optional) Whether to search recursively. Defaults to False. |
| 40 | + :param include_binary: (optional) Whether to include binary files. Defaults to False. |
| 41 | +
|
| 42 | + :return: A list of files that match the regex. |
| 43 | + """ |
| 44 | + files = [] |
| 45 | + for root, dirs, filenames in os.walk(directory): |
| 46 | + for filename in filenames: |
| 47 | + full_path = os.path.join(root, filename) |
| 48 | + if not include_binary and not istextfile(full_path): |
| 49 | + continue |
| 50 | + if grep_file(full_path, pattern): |
| 51 | + files.append(full_path) |
| 52 | + if not recursive: |
| 53 | + break |
| 54 | + return files |
| 55 | + |
| 56 | + |
| 57 | +def grep_file(file: str, pattern: re.Pattern) -> bool: |
| 58 | + """ |
| 59 | + Searches for a regex in a file. |
| 60 | +
|
| 61 | + :param file: The file to search in. |
| 62 | + :param pattern: The regex to search with. |
| 63 | +
|
| 64 | + :return: Whether the regex was found in the file. |
| 65 | + """ |
| 66 | + with open(file, "r") as f: |
| 67 | + lines = f.readlines() |
| 68 | + for line in lines: |
| 69 | + if pattern.search(line): |
| 70 | + return True |
| 71 | + return False |
0 commit comments