forked from GitHubSecurityLab/seclab-taskflows
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlocal_file_viewer.py
More file actions
176 lines (160 loc) · 6.82 KB
/
local_file_viewer.py
File metadata and controls
176 lines (160 loc) · 6.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# SPDX-FileCopyrightText: 2025 GitHub
# SPDX-License-Identifier: MIT
import logging
from fastmcp import FastMCP
from pydantic import Field
import httpx
import json
import os
from pathlib import Path
import aiofiles
import zipfile
import tempfile
from seclab_taskflow_agent.path_utils import mcp_data_dir, log_file_name
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s',
filename=log_file_dir('mcp_local_file_viewer.log'),
filemode='a'
)
mcp = FastMCP("LocalFileViewer")
LOCAL_GH_DIR = mcp_data_dir('seclab-taskflows', 'local_file_viewer', 'LOCAL_GH_DIR')
def is_subdirectory(directory, potential_subdirectory):
directory_path = Path(directory)
potential_subdirectory_path = Path(potential_subdirectory)
try:
potential_subdirectory_path.relative_to(directory_path)
return True
except ValueError:
return False
def sanitize_file_path(file_path, allow_paths):
file_path = os.path.realpath(file_path)
for allowed_path in allow_paths:
if is_subdirectory(allowed_path, file_path):
return Path(file_path)
return None
def remove_root_dir(path):
return '/'.join(path.split('/')[1:])
def strip_leading_dash(path):
if path and path[0] == '/':
path = path[1:]
return path
def search_zipfile(database_path, term, search_dir = None):
results = {}
search_dir = strip_leading_dash(search_dir)
with zipfile.ZipFile(database_path) as z:
for entry in z.infolist():
if entry.is_dir():
continue
if search_dir and not is_subdirectory(search_dir, remove_root_dir(entry.filename)):
continue
with z.open(entry, 'r') as f:
for i, line in enumerate(f):
if term in str(line):
filename = remove_root_dir(entry.filename)
if not filename in results:
results[filename] = [i+1]
else:
results[filename].append(i+1)
return results
def _list_files(database_path, root_dir = None):
results = []
root_dir = strip_leading_dash(root_dir)
with zipfile.ZipFile(database_path) as z:
for entry in z.infolist():
if entry.is_dir():
continue
filename = remove_root_dir(entry.filename)
if root_dir and not is_subdirectory(root_dir, filename):
continue
results.append(filename)
return results
def get_file(database_path, filename):
results = []
filename = strip_leading_dash(filename)
with zipfile.ZipFile(database_path) as z:
for entry in z.infolist():
if entry.is_dir():
continue
if remove_root_dir(entry.filename) == filename:
with z.open(entry, 'r') as f:
results = [line.rstrip() for line in f]
return results
return results
@mcp.tool()
async def fetch_file_content(
owner: str = Field(description="The owner of the repository"),
repo: str = Field(description="The name of the repository"),
path: str = Field(description="The path to the file in the repository"))-> str:
"""
Fetch the content of a file from a local GitHub repository.
"""
source_path = Path(f"{LOCAL_GH_DIR}/{owner}/{repo}.zip")
source_path = sanitize_file_path(source_path, [LOCAL_GH_DIR])
if not source_path or not source_path.exists():
return f"Invalid {owner} and {repo}. Check that the input is correct or try to fetch the repo from gh first."
lines = get_file(source_path, path)
if not lines:
return f"Unable to find file {path} in {owner}/{repo}"
for i in range(len(lines)):
lines[i] = f"{i+1}: {lines[i]}"
return "\n".join(lines)
@mcp.tool()
async def get_file_lines(
owner: str = Field(description="The owner of the repository"),
repo: str = Field(description="The name of the repository"),
path: str = Field(description="The path to the file in the repository"),
start_line: int = Field(description="The starting line number to fetch from the file", default=1),
length: int = Field(description="The ending line number to fetch from the file", default=10)) -> str:
"""Fetch a range of lines from a file in a local GitHub repository.
"""
source_path = Path(f"{LOCAL_GH_DIR}/{owner}/{repo}.zip")
source_path = sanitize_file_path(source_path, [LOCAL_GH_DIR])
if not source_path or not source_path.exists():
return f"Invalid {owner} and {repo}. Check that the input is correct or try to fetch the repo from gh first."
lines = get_file(source_path, path)
if start_line < 1:
start_line = 1
if length < 1:
length = 10
lines = lines[start_line-1:start_line-1+length]
if not lines:
return f"No lines found in the range {start_line} to {start_line + length - 1} in {path}."
return "\n".join([f"{i+start_line}: {line}" for i, line in enumerate(lines)])
@mcp.tool()
async def list_files(
owner: str = Field(description="The owner of the repository"),
repo: str = Field(description="The name of the repository"),
path: str = Field(description="The path to the directory in the repository")) -> str:
"""
Recursively list the files of a directory from a local GitHub repository.
"""
source_path = Path(f"{LOCAL_GH_DIR}/{owner}/{repo}.zip")
source_path = sanitize_file_path(source_path, [LOCAL_GH_DIR])
if not source_path or not source_path.exists():
return f"Invalid {owner} and {repo}. Check that the input is correct or try to fetch the repo from gh first."
content = _list_files(source_path, path)
return json.dumps(content, indent=2)
@mcp.tool()
async def search_repo(
owner: str = Field(description="The owner of the repository"),
repo: str = Field(description="The name of the repository"),
search_term: str = Field(description="The term to search within the repo."),
directory: str = Field(description="The directory or file to restrict the search, if not provided, the whole repo is searched", default = '')
):
"""
Search for the search term in the repository or a subdirectory/file in the repository.
"""
source_path = Path(f"{LOCAL_GH_DIR}/{owner}/{repo}.zip")
source_path = sanitize_file_path(source_path, [LOCAL_GH_DIR])
if not source_path or not source_path.exists():
return f"Invalid {owner} and {repo}. Check that the input is correct or try to fetch the repo from gh first."
if not source_path.exists():
return json.dumps([], indent=2)
results = search_zipfile(source_path, search_term, directory)
out = []
for k,v in results.items():
out.append({"owner": owner, "repo": repo, "path": k, "lines": v})
return json.dumps(out, indent=2)
if __name__ == "__main__":
mcp.run(show_banner=False)