Skip to content

Commit a22f39c

Browse files
"Rerun and archive sandbox" button in AWS (#1456)
Co-authored-by: Gregor Eesmaa <[email protected]>
1 parent 2cd04c4 commit a22f39c

File tree

15 files changed

+315
-65
lines changed

15 files changed

+315
-65
lines changed

cms/db/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@
8181

8282
# Instantiate or import these objects.
8383

84-
version = 45
84+
version = 46
8585

8686
engine = create_engine(config.database, echo=config.database_debug,
8787
pool_timeout=60, pool_recycle=120)

cms/db/submission.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -362,8 +362,11 @@ class SubmissionResult(Base):
362362
compilation_shard: int | None = Column(
363363
Integer,
364364
nullable=True)
365-
compilation_sandbox: str | None = Column(
366-
Unicode,
365+
compilation_sandbox_paths: list[str] | None = Column(
366+
ARRAY(Unicode),
367+
nullable=True)
368+
compilation_sandbox_digests: list[str] | None = Column(
369+
ARRAY(String),
367370
nullable=True)
368371

369372
# Evaluation outcome (can be None = yet to evaluate, "ok" =
@@ -594,16 +597,22 @@ def invalidate_compilation(self):
594597
self.compilation_memory = None
595598
self.compilation_shard = None
596599
self.compilation_sandbox = None
600+
self.compilation_sandbox_digests = []
597601
self.executables = {}
598602

599-
def invalidate_evaluation(self):
603+
def invalidate_evaluation(self, testcase_id: int | None = None):
600604
"""Blank the evaluation outcomes and the score.
601605
606+
testcase_id: ID of testcase to invalidate, or None to invalidate all.
607+
602608
"""
603609
self.invalidate_score()
604610
self.evaluation_outcome = None
605611
self.evaluation_tries = 0
606-
self.evaluations = []
612+
if testcase_id:
613+
self.evaluations = [e for e in self.evaluations if e.testcase_id != testcase_id]
614+
else:
615+
self.evaluations = []
607616

608617
def invalidate_score(self):
609618
"""Blank the score.
@@ -774,8 +783,11 @@ class Evaluation(Base):
774783
evaluation_shard: int | None = Column(
775784
Integer,
776785
nullable=True)
777-
evaluation_sandbox: str | None = Column(
778-
Unicode,
786+
evaluation_sandbox_paths: list[str] | None = Column(
787+
ARRAY(Unicode),
788+
nullable=True)
789+
evaluation_sandbox_digests: list[str] | None = Column(
790+
ARRAY(String),
779791
nullable=True)
780792

781793
@property

cms/db/usertest.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -312,8 +312,11 @@ class UserTestResult(Base):
312312
compilation_shard: int | None = Column(
313313
Integer,
314314
nullable=True)
315-
compilation_sandbox: str | None = Column(
316-
String,
315+
compilation_sandbox_paths: list[str] | None = Column(
316+
ARRAY(Unicode),
317+
nullable=True)
318+
compilation_sandbox_digests: list[str] | None = Column(
319+
ARRAY(String),
317320
nullable=True)
318321

319322
# Evaluation outcome (can be None = yet to evaluate, "ok" =
@@ -352,8 +355,11 @@ class UserTestResult(Base):
352355
evaluation_shard: int | None = Column(
353356
Integer,
354357
nullable=True)
355-
evaluation_sandbox: str | None = Column(
356-
String,
358+
evaluation_sandbox_paths: list[str] | None = Column(
359+
ARRAY(Unicode),
360+
nullable=True)
361+
evaluation_sandbox_digests: list[str] | None = Column(
362+
ARRAY(String),
357363
nullable=True)
358364

359365
# These one-to-many relationships are the reversed directions of

cms/grading/Job.py

Lines changed: 53 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,11 @@ def __init__(
8585
task_type_parameters: object = None,
8686
language: str | None = None,
8787
multithreaded_sandbox: bool = False,
88+
archive_sandbox: bool = False,
8889
shard: int | None = None,
8990
keep_sandbox: bool = False,
9091
sandboxes: list[str] | None = None,
92+
sandbox_digests: dict[str, str] | None = None,
9193
info: str | None = None,
9294
success: bool | None = None,
9395
text: list[str] | None = None,
@@ -104,12 +106,15 @@ def __init__(
104106
language: the language of the submission / user test.
105107
multithreaded_sandbox: whether the sandbox should
106108
allow multithreading.
109+
archive_sandbox: whether the sandbox is to be archived.
107110
shard: the shard of the Worker completing this job.
108111
keep_sandbox: whether to forcefully keep the sandbox,
109112
even if other conditions (the config, the sandbox status)
110113
don't warrant it.
111114
sandboxes: the paths of the sandboxes used in
112115
the Worker during the execution of the job.
116+
sandbox_digests: the digests of the sandbox archives used to
117+
debug solutions. (map of sandbox path -> archive digest)
113118
info: a human readable description of the job.
114119
success: whether the job succeeded.
115120
text: description of the outcome of the job,
@@ -125,6 +130,8 @@ def __init__(
125130
task_type = ""
126131
if sandboxes is None:
127132
sandboxes = []
133+
if sandbox_digests is None:
134+
sandbox_digests = {}
128135
if info is None:
129136
info = ""
130137
if files is None:
@@ -139,9 +146,11 @@ def __init__(
139146
self.task_type_parameters = task_type_parameters
140147
self.language = language
141148
self.multithreaded_sandbox = multithreaded_sandbox
149+
self.archive_sandbox = archive_sandbox
142150
self.shard = shard
143151
self.keep_sandbox = keep_sandbox
144152
self.sandboxes = sandboxes
153+
self.sandbox_digests = sandbox_digests
145154
self.info = info
146155

147156
self.success = success
@@ -161,9 +170,11 @@ def export_to_dict(self) -> dict:
161170
'task_type_parameters': self.task_type_parameters,
162171
'language': self.language,
163172
'multithreaded_sandbox': self.multithreaded_sandbox,
173+
'archive_sandbox': self.archive_sandbox,
164174
'shard': self.shard,
165175
'keep_sandbox': self.keep_sandbox,
166176
'sandboxes': self.sandboxes,
177+
'sandbox_digests': self.sandbox_digests,
167178
'info': self.info,
168179
'success': self.success,
169180
'text': self.text,
@@ -253,6 +264,26 @@ def from_operation(
253264
job = EvaluationJob.from_user_test(operation, object_, dataset)
254265
return job
255266

267+
def get_sandbox_digest_list(self) -> list[str] | None:
268+
"""
269+
Convert self.sandbox_digests into a list, where each index matches the
270+
corresponding index in self.sandboxes.
271+
"""
272+
if not self.sandbox_digests:
273+
return None
274+
res: list[str | None] = [None] * len(self.sandboxes)
275+
for k,v in self.sandbox_digests.items():
276+
if k in self.sandboxes:
277+
index = self.sandboxes.index(k)
278+
res[index] = v
279+
else:
280+
logger.warning("Have digest for unknown sandbox %s", k)
281+
if None in res:
282+
ind = res.index(None)
283+
logger.warning("Sandbox %s was not archived", self.sandboxes[ind])
284+
return None
285+
return res
286+
256287

257288
class CompilationJob(Job):
258289
"""Job representing a compilation.
@@ -274,9 +305,11 @@ def __init__(
274305
shard: int | None = None,
275306
keep_sandbox: bool = False,
276307
sandboxes: list[str] | None = None,
308+
sandbox_digests: dict[str, str] | None = None,
277309
info: str | None = None,
278310
language: str | None = None,
279311
multithreaded_sandbox: bool = False,
312+
archive_sandbox: bool = False,
280313
files: dict[str, File] | None = None,
281314
managers: dict[str, Manager] | None = None,
282315
success: bool | None = None,
@@ -296,9 +329,9 @@ def __init__(
296329
"""
297330

298331
Job.__init__(self, operation, task_type, task_type_parameters,
299-
language, multithreaded_sandbox,
300-
shard, keep_sandbox, sandboxes, info, success, text,
301-
files, managers, executables)
332+
language, multithreaded_sandbox, archive_sandbox,
333+
shard, keep_sandbox, sandboxes, sandbox_digests, info, success,
334+
text, files, managers, executables)
302335
self.compilation_success = compilation_success
303336
self.plus = plus
304337

@@ -341,6 +374,7 @@ def from_submission(
341374
task_type_parameters=dataset.task_type_parameters,
342375
language=submission.language,
343376
multithreaded_sandbox=multithreaded,
377+
archive_sandbox=operation.archive_sandbox,
344378
files=dict(submission.files),
345379
managers=dict(dataset.managers),
346380
info="compile submission %d" % (submission.id)
@@ -367,7 +401,8 @@ def to_submission(self, sr: SubmissionResult):
367401
self.plus.get('execution_wall_clock_time')
368402
sr.compilation_memory = self.plus.get('execution_memory')
369403
sr.compilation_shard = self.shard
370-
sr.compilation_sandbox = ":".join(self.sandboxes)
404+
sr.compilation_sandbox_paths = self.sandboxes
405+
sr.compilation_sandbox_digests = self.get_sandbox_digest_list()
371406
for executable in self.executables.values():
372407
sr.executables.set(executable)
373408

@@ -431,6 +466,7 @@ def from_user_test(
431466
task_type_parameters=dataset.task_type_parameters,
432467
language=user_test.language,
433468
multithreaded_sandbox=multithreaded,
469+
archive_sandbox=operation.archive_sandbox,
434470
files=dict(user_test.files),
435471
managers=managers,
436472
info="compile user test %d" % (user_test.id)
@@ -457,7 +493,8 @@ def to_user_test(self, ur: UserTestResult):
457493
self.plus.get('execution_wall_clock_time')
458494
ur.compilation_memory = self.plus.get('execution_memory')
459495
ur.compilation_shard = self.shard
460-
ur.compilation_sandbox = ":".join(self.sandboxes)
496+
ur.compilation_sandbox_paths = self.sandboxes
497+
ur.compilation_sandbox_digests = self.get_sandbox_digest_list()
461498
for executable in self.executables.values():
462499
u_executable = UserTestExecutable(
463500
executable.filename, executable.digest)
@@ -485,9 +522,11 @@ def __init__(
485522
shard: int | None = None,
486523
keep_sandbox: bool = False,
487524
sandboxes: list[str] | None = None,
525+
sandbox_digests: dict[str, str] | None = None,
488526
info: str | None = None,
489527
language: str | None = None,
490528
multithreaded_sandbox: bool = False,
529+
archive_sandbox: bool = False,
491530
files: dict[str, File] | None = None,
492531
managers: dict[str, Manager] | None = None,
493532
executables: dict[str, Executable] | None = None,
@@ -526,9 +565,9 @@ def __init__(
526565
527566
"""
528567
Job.__init__(self, operation, task_type, task_type_parameters,
529-
language, multithreaded_sandbox,
530-
shard, keep_sandbox, sandboxes, info, success, text,
531-
files, managers, executables)
568+
language, multithreaded_sandbox, archive_sandbox,
569+
shard, keep_sandbox, sandboxes, sandbox_digests, info, success,
570+
text, files, managers, executables)
532571
self.input = input
533572
self.output = output
534573
self.time_limit = time_limit
@@ -592,6 +631,7 @@ def from_submission(
592631
task_type_parameters=dataset.task_type_parameters,
593632
language=submission.language,
594633
multithreaded_sandbox=multithreaded,
634+
archive_sandbox=operation.archive_sandbox,
595635
files=dict(submission.files),
596636
managers=dict(dataset.managers),
597637
executables=dict(submission_result.executables),
@@ -619,7 +659,8 @@ def to_submission(self, sr: SubmissionResult):
619659
'execution_wall_clock_time'),
620660
execution_memory=self.plus.get('execution_memory'),
621661
evaluation_shard=self.shard,
622-
evaluation_sandbox=":".join(self.sandboxes),
662+
evaluation_sandbox_paths=self.sandboxes,
663+
evaluation_sandbox_digests=self.get_sandbox_digest_list(),
623664
testcase=sr.dataset.testcases[self.operation.testcase_codename])]
624665

625666
@staticmethod
@@ -674,6 +715,7 @@ def from_user_test(
674715
task_type_parameters=dataset.task_type_parameters,
675716
language=user_test.language,
676717
multithreaded_sandbox=multithreaded,
718+
archive_sandbox=operation.archive_sandbox,
677719
files=dict(user_test.files),
678720
managers=managers,
679721
executables=dict(user_test_result.executables),
@@ -704,7 +746,8 @@ def to_user_test(self, ur: UserTestResult):
704746
self.plus.get('execution_wall_clock_time')
705747
ur.execution_memory = self.plus.get('execution_memory')
706748
ur.evaluation_shard = self.shard
707-
ur.evaluation_sandbox = ":".join(self.sandboxes)
749+
ur.evaluation_sandbox_paths = self.sandboxes
750+
ur.evaluation_sandbox_digests = self.get_sandbox_digest_list()
708751
ur.output = self.user_output
709752

710753

cms/grading/Sandbox.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import stat
2828
import tempfile
2929
import time
30+
import tarfile
3031
from abc import ABCMeta, abstractmethod
3132
from functools import wraps, partial
3233
import typing
@@ -532,10 +533,32 @@ def cleanup(self, delete: bool = False):
532533
533534
delete: if True, also delete get_root_path() and everything it
534535
contains.
535-
536536
"""
537537
pass
538538

539+
def archive(self) -> str | None:
540+
"""Archive the directory where the sandbox operated.
541+
542+
Stores the archived sandbox in the file cacher and returns its digest.
543+
Returns None if archiving failed.
544+
545+
"""
546+
logger.info("Archiving sandbox in %s.", self.get_root_path())
547+
548+
with tempfile.TemporaryFile(dir=self.temp_dir) as sandbox_archive:
549+
# Archive the working directory
550+
content_path = self.get_root_path()
551+
try:
552+
with tarfile.open(fileobj=sandbox_archive, mode='w:gz') as tar_file:
553+
tar_file.add(content_path, os.path.basename(content_path))
554+
except Exception:
555+
logger.warning("Failed to archive sandbox", exc_info=True)
556+
return None
557+
558+
# Put archive to FS
559+
sandbox_archive.seek(0)
560+
return self.file_cacher.put_file_from_fobj(sandbox_archive, "Sandbox %s" % self.get_root_path())
561+
539562

540563
class StupidSandbox(SandboxBase):
541564
"""A stupid sandbox implementation. It has very few features and

cms/grading/tasktypes/Batch.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ def _do_compile(self, job, file_cacher):
257257
Executable(executable_filename, digest)
258258

259259
# Cleanup.
260-
delete_sandbox(sandbox, job.success, job.keep_sandbox)
260+
delete_sandbox(sandbox, job)
261261

262262
def compile(self, job, file_cacher):
263263
"""See TaskType.compile."""
@@ -380,7 +380,7 @@ def _evaluate_step(self, job, file_cacher, output_file_params, outcome, text, st
380380
job.plus = stats
381381

382382
if sandbox is not None:
383-
delete_sandbox(sandbox, job.success, job.keep_sandbox)
383+
delete_sandbox(sandbox, job)
384384

385385
def evaluate(self, job, file_cacher):
386386
"""See TaskType.evaluate."""

cms/grading/tasktypes/Communication.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ def compile(self, job, file_cacher):
242242
Executable(executable_filename, digest)
243243

244244
# Cleanup.
245-
delete_sandbox(sandbox, job.success, job.keep_sandbox)
245+
delete_sandbox(sandbox, job)
246246

247247
def evaluate(self, job, file_cacher):
248248
"""See TaskType.evaluate."""
@@ -434,9 +434,9 @@ def evaluate(self, job, file_cacher):
434434
job.text = text
435435
job.plus = stats_user
436436

437-
delete_sandbox(sandbox_mgr, job.success, job.keep_sandbox)
437+
delete_sandbox(sandbox_mgr, job)
438438
for s in sandbox_user:
439-
delete_sandbox(s, job.success, job.keep_sandbox)
439+
delete_sandbox(s, job)
440440
if job.success and not config.keep_sandbox and not job.keep_sandbox:
441441
for d in fifo_dir:
442442
rmtree(d)

cms/grading/tasktypes/TwoSteps.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ def compile(self, job, file_cacher):
210210
Executable(executable_filename, digest)
211211

212212
# Cleanup
213-
delete_sandbox(sandbox, job.success, job.keep_sandbox)
213+
delete_sandbox(sandbox, job)
214214

215215
def evaluate(self, job, file_cacher):
216216
"""See TaskType.evaluate."""
@@ -346,5 +346,5 @@ def evaluate(self, job, file_cacher):
346346
job.text = text
347347
job.plus = stats
348348

349-
delete_sandbox(first_sandbox, job.success, job.keep_sandbox)
350-
delete_sandbox(second_sandbox, job.success, job.keep_sandbox)
349+
delete_sandbox(first_sandbox, job)
350+
delete_sandbox(second_sandbox, job)

0 commit comments

Comments
 (0)