Skip to content

Commit eb64d2a

Browse files
committed
Add config/.ignore file, parsed to ignore file patterns for upload
Currently only includes some python bytecode files
1 parent d3f906f commit eb64d2a

File tree

3 files changed

+37
-7
lines changed

3 files changed

+37
-7
lines changed

config/.ignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
__pycache__/
2+
*.py[cod]
3+
*$py.class
4+
*.so

snekbox/memfs.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44
import logging
55
import warnings
66
import weakref
7-
from collections.abc import Generator
7+
from collections.abc import Generator, Sequence
88
from contextlib import suppress
9+
from fnmatch import fnmatch
910
from pathlib import Path
1011
from types import TracebackType
1112
from typing import Type
@@ -124,6 +125,7 @@ def files(
124125
self,
125126
limit: int,
126127
pattern: str = "**/*",
128+
ignores: Sequence[str] = (),
127129
exclude_files: dict[Path, float] | None = None,
128130
) -> Generator[FileAttachment, None, None]:
129131
"""
@@ -132,47 +134,62 @@ def files(
132134
Args:
133135
limit: The maximum number of files to parse.
134136
pattern: The glob pattern to match files against.
137+
ignores: A sequence of fnmatch patterns to ignore.
135138
exclude_files: A dict of Paths and last modified times.
136139
Files will be excluded if their last modified time
137140
is equal to the provided value.
138141
"""
139142
count = 0
140143
for file in self.output.rglob(pattern):
144+
if any(
145+
fnmatch(str(file.relative_to(self.home)), match_pattern := ignore_pattern)
146+
for ignore_pattern in ignores
147+
):
148+
log.info(f"Ignoring {file.name!r} as it matches {match_pattern!r}")
149+
continue
150+
141151
if exclude_files and (orig_time := exclude_files.get(file)):
142152
new_time = file.stat().st_mtime
143153
log.info(f"Checking {file.name} ({orig_time=}, {new_time=})")
144154
if file.stat().st_mtime == orig_time:
145-
log.info(f"Skipping {file.name} as it has not been modified")
155+
log.info(f"Skipping {file.name!r} as it has not been modified")
146156
continue
157+
147158
if count > limit:
148159
log.info(f"Max attachments {limit} reached, skipping remaining files")
149160
break
161+
150162
if file.is_file():
151163
count += 1
152-
log.info(f"Found file {file!s}")
164+
log.info(f"Found valid file for upload {file.name!r}")
153165
yield FileAttachment.from_path(file, relative_to=self.output)
154166

155167
def files_list(
156168
self,
157169
limit: int,
158170
pattern: str,
159-
preload_dict: bool = False,
171+
ignores: Sequence[str] = (),
160172
exclude_files: dict[Path, float] | None = None,
173+
preload_dict: bool = False,
161174
) -> list[FileAttachment]:
162175
"""
163176
Return a sorted list of file paths within the output directory.
164177
165178
Args:
166179
limit: The maximum number of files to parse.
167180
pattern: The glob pattern to match files against.
168-
preload_dict: Whether to preload as_dict property data.
181+
ignores: A sequence of fnmatch patterns to ignore.
169182
exclude_files: A dict of Paths and last modified times.
170183
Files will be excluded if their last modified time
171184
is equal to the provided value.
185+
preload_dict: Whether to preload as_dict property data.
172186
Returns:
173187
List of FileAttachments sorted lexically by path name.
174188
"""
175-
res = sorted(self.files(limit, pattern, exclude_files), key=lambda f: f.path)
189+
res = sorted(
190+
self.files(limit=limit, pattern=pattern, ignores=ignores, exclude_files=exclude_files),
191+
key=lambda f: f.path,
192+
)
176193
if preload_dict:
177194
for file in res:
178195
# Loads the cached property as attribute

snekbox/nsjail.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ def __init__(
5858
files_limit: int | None = 100,
5959
files_timeout: float | None = 8,
6060
files_pattern: str = "**/[!_]*",
61+
files_ignore_path: str = "./config/.ignore",
6162
):
6263
"""
6364
Initialize NsJail.
@@ -74,17 +75,21 @@ def __init__(
7475
files_limit: Maximum number of output files to parse.
7576
files_timeout: Maximum time in seconds to wait for output files to be read.
7677
files_pattern: Pattern to match files to attach within the output directory.
78+
files_ignore_path: Path to a file containing a gitignore-like list of file
79+
patterns to ignore for upload.
7780
"""
7881
self.nsjail_path = nsjail_path
7982
self.config_path = config_path
8083
self.max_output_size = max_output_size
8184
self.read_chunk_size = read_chunk_size
85+
8286
self.memfs_instance_size = memfs_instance_size
8387
self.memfs_home = memfs_home
8488
self.memfs_output = memfs_output
8589
self.files_limit = files_limit
8690
self.files_timeout = files_timeout
8791
self.files_pattern = files_pattern
92+
self.files_ignores = Path(files_ignore_path).read_text().splitlines()
8893

8994
self.config = self._read_config(config_path)
9095
self.cgroup_version = utils.cgroup.init(self.config)
@@ -269,7 +274,11 @@ def python3(
269274
attachments = timed(
270275
MemFS.files_list,
271276
(fs, self.files_limit, self.files_pattern),
272-
{"preload_dict": True, "exclude_files": files_written},
277+
{
278+
"ignores": self.files_ignores,
279+
"preload_dict": True,
280+
"exclude_files": files_written,
281+
},
273282
timeout=self.files_timeout,
274283
)
275284
log.info(f"Found {len(attachments)} files.")

0 commit comments

Comments
 (0)