Skip to content

Commit 19df6ab

Browse files
authored
[Feature:Plagiarism] Add size limit to concatenation (#49)
* Add size limit check * print total size conatenated * Tweak print statements
1 parent 91ea08a commit 19df6ab

File tree

2 files changed

+34
-5
lines changed

2 files changed

+34
-5
lines changed

bin/concatenate_all.py

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,17 @@
66

77
import argparse
88
import os
9+
import sys
910
import json
1011
import time
12+
import humanize
1113
import fnmatch
1214
from pathlib import Path
1315

1416
IGNORED_FILES = [
1517
".submit.timestamp"
1618
]
19+
MAX_CONCAT_SIZE = 1e9
1720

1821

1922
# returns a string containing the contents of the files which match the regex in the specified dir
@@ -41,6 +44,12 @@ def getConcatFilesInDir(input_dir, regex_patterns):
4144
return result
4245

4346

47+
def checkTotalSize(total_concat):
48+
if total_concat > MAX_CONCAT_SIZE:
49+
raise SystemExit(f"ERROR! exceeded {humanize.naturalsize(MAX_CONCAT_SIZE)}"
50+
" of concatenated files allowed")
51+
52+
4453
def parse_args():
4554
parser = argparse.ArgumentParser(description="")
4655
parser.add_argument("basepath")
@@ -123,6 +132,7 @@ def main():
123132

124133
# ==========================================================================
125134
# loop through and concatenate the selected files for each user in this gradeable
135+
total_concat = 0
126136

127137
for dir in regex_dirs:
128138
gradeable_path = os.path.join(args.datapath, semester, course, dir, gradeable)
@@ -159,6 +169,9 @@ def main():
159169
with open(output_file_path, "a") as output_file:
160170
concatenated_contents = getConcatFilesInDir(version_path, regex_patterns)
161171
output_file.write(concatenated_contents)
172+
total_concat += sys.getsizeof(concatenated_contents)
173+
174+
checkTotalSize(total_concat)
162175

163176
# ==========================================================================
164177
# loop over all of the other prior term gradeables and concatenate their submissions
@@ -203,18 +216,26 @@ def main():
203216
other_concatenated_contents = getConcatFilesInDir(other_version_path,
204217
regex_patterns)
205218
other_output_file.write(other_concatenated_contents)
219+
total_concat += sys.getsizeof(other_concatenated_contents)
220+
221+
checkTotalSize(total_concat)
206222

207223
# ==========================================================================
208224
# iterate over all of the created submissions, checking to see if they are empty
209225
# and adding a message to the top if so (to differentiate empty files from errors in the UI)
226+
227+
no_files_match_error = "ERROR! No files matched provided regex in selected directories"
228+
210229
for user in os.listdir(os.path.join(args.basepath, "users")):
211230
user_path = os.path.join(args.basepath, "users", user)
212231
for version in os.listdir(user_path):
213232
version_path = os.path.join(user_path, version)
214233
my_concatenated_file = os.path.join(version_path, "submission.concatenated")
215234
with open(my_concatenated_file, "r+") as my_cf:
216235
if my_cf.read() == "":
217-
my_cf.write("ERROR! No files matched provided regex in selected directories")
236+
my_cf.write(no_files_match_error)
237+
total_concat += sys.getsizeof(no_files_match_error)
238+
checkTotalSize(total_concat)
218239

219240
# do the same for the other gradeables
220241
for other_gradeable in prior_term_gradeables:
@@ -228,19 +249,24 @@ def main():
228249
my_concatenated_file = os.path.join(other_version_path, "submission.concatenated")
229250
with open(my_concatenated_file, "r+") as my_cf:
230251
if my_cf.read() == "":
231-
my_cf.write("ERROR! No files matched provided regex in"
232-
"selected directories")
252+
my_cf.write(no_files_match_error)
253+
total_concat += sys.getsizeof(no_files_match_error)
254+
checkTotalSize(total_concat)
233255

234256
# ==========================================================================
235257
# concatenate provided code
236258
with open(os.path.join(args.basepath, "provided_code",
237259
"submission.concatenated"), "w") as file:
238260
provided_code_files = os.path.join(args.basepath, "provided_code", "files")
239-
file.write(getConcatFilesInDir(provided_code_files, regex_patterns))
261+
provided_concatenated_files = getConcatFilesInDir(provided_code_files, regex_patterns)
262+
file.write(provided_concatenated_files)
263+
total_concat += sys.getsizeof(provided_concatenated_files)
264+
checkTotalSize(total_concat)
240265

241266
# ==========================================================================
242267
end_time = time.time()
243-
print("done in " + "%.0f" % (end_time - start_time) + " seconds")
268+
print("done in " + "%.0f" % (end_time - start_time) + " seconds,",
269+
humanize.naturalsize(total_concat) + " concatenated")
244270

245271

246272
if __name__ == "__main__":

requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,6 @@ clang==11.0
99

1010
# Java tokenization
1111
javac_parser==1.0.0
12+
13+
# turn data into human readable format
14+
humanize==3.11.0

0 commit comments

Comments
 (0)