cms-dev
diff --git a/‎cms/db/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎cms/db/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cms/db/submission.py‎
Lines changed: 18 additions & 6 deletions b/‎cms/db/submission.py‎
Lines changed: 18 additions & 6 deletions
diff --git a/‎cms/db/usertest.py‎
Lines changed: 10 additions & 4 deletions b/‎cms/db/usertest.py‎
Lines changed: 10 additions & 4 deletions
diff --git a/‎cms/grading/Job.py‎
Lines changed: 53 additions & 10 deletions b/‎cms/grading/Job.py‎
Lines changed: 53 additions & 10 deletions
diff --git a/‎cms/grading/Sandbox.py‎
Lines changed: 24 additions & 1 deletion b/‎cms/grading/Sandbox.py‎
Lines changed: 24 additions & 1 deletion
diff --git a/‎cms/grading/tasktypes/Batch.py‎
Lines changed: 2 additions & 2 deletions b/‎cms/grading/tasktypes/Batch.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎cms/grading/tasktypes/Communication.py‎
Lines changed: 3 additions & 3 deletions b/‎cms/grading/tasktypes/Communication.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎cms/grading/tasktypes/TwoSteps.py‎
Lines changed: 3 additions & 3 deletions b/‎cms/grading/tasktypes/TwoSteps.py‎
Lines changed: 3 additions & 3 deletions
@@ -81,7 +81,7 @@
 
 # Instantiate or import these objects.
 
-version = 45
+version = 46
 
 engine = create_engine(config.database, echo=config.database_debug,
                        pool_timeout=60, pool_recycle=120)
 
@@ -362,8 +362,11 @@ class SubmissionResult(Base):
     compilation_shard: int | None = Column(
         Integer,
         nullable=True)
-    compilation_sandbox: str | None = Column(
-        Unicode,
+    compilation_sandbox_paths: list[str] | None = Column(
+        ARRAY(Unicode),
+        nullable=True)
+    compilation_sandbox_digests: list[str] | None = Column(
+        ARRAY(String),
         nullable=True)
 
     # Evaluation outcome (can be None = yet to evaluate, "ok" =
@@ -594,16 +597,22 @@ def invalidate_compilation(self):
         self.compilation_memory = None
         self.compilation_shard = None
         self.compilation_sandbox = None
+        self.compilation_sandbox_digests = []
         self.executables = {}
 
-    def invalidate_evaluation(self):
+    def invalidate_evaluation(self, testcase_id: int | None = None):
         """Blank the evaluation outcomes and the score.
 
+        testcase_id: ID of testcase to invalidate, or None to invalidate all.
+
         """
         self.invalidate_score()
         self.evaluation_outcome = None
         self.evaluation_tries = 0
-        self.evaluations = []
+        if testcase_id:
+            self.evaluations = [e for e in self.evaluations if e.testcase_id != testcase_id]
+        else:
+            self.evaluations = []
 
     def invalidate_score(self):
         """Blank the score.
@@ -774,8 +783,11 @@ class Evaluation(Base):
     evaluation_shard: int | None = Column(
         Integer,
         nullable=True)
-    evaluation_sandbox: str | None = Column(
-        Unicode,
+    evaluation_sandbox_paths: list[str] | None = Column(
+        ARRAY(Unicode),
+        nullable=True)
+    evaluation_sandbox_digests: list[str] | None = Column(
+        ARRAY(String),
         nullable=True)
 
     @property
 
@@ -312,8 +312,11 @@ class UserTestResult(Base):
     compilation_shard: int | None = Column(
         Integer,
         nullable=True)
-    compilation_sandbox: str | None = Column(
-        String,
+    compilation_sandbox_paths: list[str] | None = Column(
+        ARRAY(Unicode),
+        nullable=True)
+    compilation_sandbox_digests: list[str] | None = Column(
+        ARRAY(String),
         nullable=True)
 
     # Evaluation outcome (can be None = yet to evaluate, "ok" =
@@ -352,8 +355,11 @@ class UserTestResult(Base):
     evaluation_shard: int | None = Column(
         Integer,
         nullable=True)
-    evaluation_sandbox: str | None = Column(
-        String,
+    evaluation_sandbox_paths: list[str] | None = Column(
+        ARRAY(Unicode),
+        nullable=True)
+    evaluation_sandbox_digests: list[str] | None = Column(
+        ARRAY(String),
         nullable=True)
 
     # These one-to-many relationships are the reversed directions of
 
@@ -85,9 +85,11 @@ def __init__(
         task_type_parameters: object = None,
         language: str | None = None,
         multithreaded_sandbox: bool = False,
+        archive_sandbox: bool = False,
         shard: int | None = None,
         keep_sandbox: bool = False,
         sandboxes: list[str] | None = None,
+        sandbox_digests: dict[str, str] | None = None,
         info: str | None = None,
         success: bool | None = None,
         text: list[str] | None = None,
@@ -104,12 +106,15 @@ def __init__(
         language: the language of the submission / user test.
         multithreaded_sandbox: whether the sandbox should
             allow multithreading.
+        archive_sandbox: whether the sandbox is to be archived.
         shard: the shard of the Worker completing this job.
         keep_sandbox: whether to forcefully keep the sandbox,
             even if other conditions (the config, the sandbox status)
             don't warrant it.
         sandboxes: the paths of the sandboxes used in
             the Worker during the execution of the job.
+        sandbox_digests: the digests of the sandbox archives used to
+            debug solutions. (map of sandbox path -> archive digest)
         info: a human readable description of the job.
         success: whether the job succeeded.
         text: description of the outcome of the job,
@@ -125,6 +130,8 @@ def __init__(
             task_type = ""
         if sandboxes is None:
             sandboxes = []
+        if sandbox_digests is None:
+            sandbox_digests = {}
         if info is None:
             info = ""
         if files is None:
@@ -139,9 +146,11 @@ def __init__(
         self.task_type_parameters = task_type_parameters
         self.language = language
         self.multithreaded_sandbox = multithreaded_sandbox
+        self.archive_sandbox = archive_sandbox
         self.shard = shard
         self.keep_sandbox = keep_sandbox
         self.sandboxes = sandboxes
+        self.sandbox_digests = sandbox_digests
         self.info = info
 
         self.success = success
@@ -161,9 +170,11 @@ def export_to_dict(self) -> dict:
             'task_type_parameters': self.task_type_parameters,
             'language': self.language,
             'multithreaded_sandbox': self.multithreaded_sandbox,
+            'archive_sandbox': self.archive_sandbox,
             'shard': self.shard,
             'keep_sandbox': self.keep_sandbox,
             'sandboxes': self.sandboxes,
+            'sandbox_digests': self.sandbox_digests,
             'info': self.info,
             'success': self.success,
             'text': self.text,
@@ -253,6 +264,26 @@ def from_operation(
             job = EvaluationJob.from_user_test(operation, object_, dataset)
         return job
 
+    def get_sandbox_digest_list(self) -> list[str] | None:
+        """
+        Convert self.sandbox_digests into a list, where each index matches the
+        corresponding index in self.sandboxes.
+        """
+        if not self.sandbox_digests:
+            return None
+        res: list[str | None] = [None] * len(self.sandboxes)
+        for k,v in self.sandbox_digests.items():
+            if k in self.sandboxes:
+                index = self.sandboxes.index(k)
+                res[index] = v
+            else:
+                logger.warning("Have digest for unknown sandbox %s", k)
+        if None in res:
+            ind = res.index(None)
+            logger.warning("Sandbox %s was not archived", self.sandboxes[ind])
+            return None
+        return res
+
 
 class CompilationJob(Job):
     """Job representing a compilation.
@@ -274,9 +305,11 @@ def __init__(
         shard: int | None = None,
         keep_sandbox: bool = False,
         sandboxes: list[str] | None = None,
+        sandbox_digests: dict[str, str] | None = None,
         info: str | None = None,
         language: str | None = None,
         multithreaded_sandbox: bool = False,
+        archive_sandbox: bool = False,
         files: dict[str, File] | None = None,
         managers: dict[str, Manager] | None = None,
         success: bool | None = None,
@@ -296,9 +329,9 @@ def __init__(
         """
 
         Job.__init__(self, operation, task_type, task_type_parameters,
-                     language, multithreaded_sandbox,
-                     shard, keep_sandbox, sandboxes, info, success, text,
-                     files, managers, executables)
+                     language, multithreaded_sandbox, archive_sandbox,
+                     shard, keep_sandbox, sandboxes, sandbox_digests, info, success,
+                     text, files, managers, executables)
         self.compilation_success = compilation_success
         self.plus = plus
 
@@ -341,6 +374,7 @@ def from_submission(
             task_type_parameters=dataset.task_type_parameters,
             language=submission.language,
             multithreaded_sandbox=multithreaded,
+            archive_sandbox=operation.archive_sandbox,
             files=dict(submission.files),
             managers=dict(dataset.managers),
             info="compile submission %d" % (submission.id)
@@ -367,7 +401,8 @@ def to_submission(self, sr: SubmissionResult):
             self.plus.get('execution_wall_clock_time')
         sr.compilation_memory = self.plus.get('execution_memory')
         sr.compilation_shard = self.shard
-        sr.compilation_sandbox = ":".join(self.sandboxes)
+        sr.compilation_sandbox_paths = self.sandboxes
+        sr.compilation_sandbox_digests = self.get_sandbox_digest_list()
         for executable in self.executables.values():
             sr.executables.set(executable)
 
@@ -431,6 +466,7 @@ def from_user_test(
             task_type_parameters=dataset.task_type_parameters,
             language=user_test.language,
             multithreaded_sandbox=multithreaded,
+            archive_sandbox=operation.archive_sandbox,
             files=dict(user_test.files),
             managers=managers,
             info="compile user test %d" % (user_test.id)
@@ -457,7 +493,8 @@ def to_user_test(self, ur: UserTestResult):
             self.plus.get('execution_wall_clock_time')
         ur.compilation_memory = self.plus.get('execution_memory')
         ur.compilation_shard = self.shard
-        ur.compilation_sandbox = ":".join(self.sandboxes)
+        ur.compilation_sandbox_paths = self.sandboxes
+        ur.compilation_sandbox_digests = self.get_sandbox_digest_list()
         for executable in self.executables.values():
             u_executable = UserTestExecutable(
                 executable.filename, executable.digest)
@@ -485,9 +522,11 @@ def __init__(
         shard: int | None = None,
         keep_sandbox: bool = False,
         sandboxes: list[str] | None = None,
+        sandbox_digests: dict[str, str] | None = None,
         info: str | None = None,
         language: str | None = None,
         multithreaded_sandbox: bool = False,
+        archive_sandbox: bool = False,
         files: dict[str, File] | None = None,
         managers: dict[str, Manager] | None = None,
         executables: dict[str, Executable] | None = None,
@@ -526,9 +565,9 @@ def __init__(
 
         """
         Job.__init__(self, operation, task_type, task_type_parameters,
-                     language, multithreaded_sandbox,
-                     shard, keep_sandbox, sandboxes, info, success, text,
-                     files, managers, executables)
+                     language, multithreaded_sandbox, archive_sandbox,
+                     shard, keep_sandbox, sandboxes, sandbox_digests, info, success,
+                     text, files, managers, executables)
         self.input = input
         self.output = output
         self.time_limit = time_limit
@@ -592,6 +631,7 @@ def from_submission(
             task_type_parameters=dataset.task_type_parameters,
             language=submission.language,
             multithreaded_sandbox=multithreaded,
+            archive_sandbox=operation.archive_sandbox,
             files=dict(submission.files),
             managers=dict(dataset.managers),
             executables=dict(submission_result.executables),
@@ -619,7 +659,8 @@ def to_submission(self, sr: SubmissionResult):
                 'execution_wall_clock_time'),
             execution_memory=self.plus.get('execution_memory'),
             evaluation_shard=self.shard,
-            evaluation_sandbox=":".join(self.sandboxes),
+            evaluation_sandbox_paths=self.sandboxes,
+            evaluation_sandbox_digests=self.get_sandbox_digest_list(),
             testcase=sr.dataset.testcases[self.operation.testcase_codename])]
 
     @staticmethod
@@ -674,6 +715,7 @@ def from_user_test(
             task_type_parameters=dataset.task_type_parameters,
             language=user_test.language,
             multithreaded_sandbox=multithreaded,
+            archive_sandbox=operation.archive_sandbox,
             files=dict(user_test.files),
             managers=managers,
             executables=dict(user_test_result.executables),
@@ -704,7 +746,8 @@ def to_user_test(self, ur: UserTestResult):
             self.plus.get('execution_wall_clock_time')
         ur.execution_memory = self.plus.get('execution_memory')
         ur.evaluation_shard = self.shard
-        ur.evaluation_sandbox = ":".join(self.sandboxes)
+        ur.evaluation_sandbox_paths = self.sandboxes
+        ur.evaluation_sandbox_digests = self.get_sandbox_digest_list()
         ur.output = self.user_output
 
 
 
@@ -27,6 +27,7 @@
 import stat
 import tempfile
 import time
+import tarfile
 from abc import ABCMeta, abstractmethod
 from functools import wraps, partial
 import typing
@@ -532,10 +533,32 @@ def cleanup(self, delete: bool = False):
 
         delete: if True, also delete get_root_path() and everything it
             contains.
-
         """
         pass
 
+    def archive(self) -> str | None:
+        """Archive the directory where the sandbox operated.
+
+        Stores the archived sandbox in the file cacher and returns its digest.
+        Returns None if archiving failed.
+
+        """
+        logger.info("Archiving sandbox in %s.", self.get_root_path())
+
+        with tempfile.TemporaryFile(dir=self.temp_dir) as sandbox_archive:
+            # Archive the working directory
+            content_path = self.get_root_path()
+            try:
+                with tarfile.open(fileobj=sandbox_archive, mode='w:gz') as tar_file:
+                    tar_file.add(content_path, os.path.basename(content_path))
+            except Exception:
+                logger.warning("Failed to archive sandbox", exc_info=True)
+                return None
+
+            # Put archive to FS
+            sandbox_archive.seek(0)
+            return self.file_cacher.put_file_from_fobj(sandbox_archive, "Sandbox %s" % self.get_root_path())
+
 
 class StupidSandbox(SandboxBase):
     """A stupid sandbox implementation. It has very few features and
 
@@ -257,7 +257,7 @@ def _do_compile(self, job, file_cacher):
                 Executable(executable_filename, digest)
 
         # Cleanup.
-        delete_sandbox(sandbox, job.success, job.keep_sandbox)
+        delete_sandbox(sandbox, job)
 
     def compile(self, job, file_cacher):
         """See TaskType.compile."""
@@ -380,7 +380,7 @@ def _evaluate_step(self, job, file_cacher, output_file_params, outcome, text, st
         job.plus = stats
 
         if sandbox is not None:
-            delete_sandbox(sandbox, job.success, job.keep_sandbox)
+            delete_sandbox(sandbox, job)
 
     def evaluate(self, job, file_cacher):
         """See TaskType.evaluate."""
 
@@ -242,7 +242,7 @@ def compile(self, job, file_cacher):
                 Executable(executable_filename, digest)
 
         # Cleanup.
-        delete_sandbox(sandbox, job.success, job.keep_sandbox)
+        delete_sandbox(sandbox, job)
 
     def evaluate(self, job, file_cacher):
         """See TaskType.evaluate."""
@@ -434,9 +434,9 @@ def evaluate(self, job, file_cacher):
         job.text = text
         job.plus = stats_user
 
-        delete_sandbox(sandbox_mgr, job.success, job.keep_sandbox)
+        delete_sandbox(sandbox_mgr, job)
         for s in sandbox_user:
-            delete_sandbox(s, job.success, job.keep_sandbox)
+            delete_sandbox(s, job)
         if job.success and not config.keep_sandbox and not job.keep_sandbox:
             for d in fifo_dir:
                 rmtree(d)
@@ -210,7 +210,7 @@ def compile(self, job, file_cacher):
                 Executable(executable_filename, digest)
 
         # Cleanup
-        delete_sandbox(sandbox, job.success, job.keep_sandbox)
+        delete_sandbox(sandbox, job)
 
     def evaluate(self, job, file_cacher):
         """See TaskType.evaluate."""
@@ -346,5 +346,5 @@ def evaluate(self, job, file_cacher):
         job.text = text
         job.plus = stats
 
-        delete_sandbox(first_sandbox, job.success, job.keep_sandbox)
-        delete_sandbox(second_sandbox, job.success, job.keep_sandbox)
+        delete_sandbox(first_sandbox, job)
+        delete_sandbox(second_sandbox, job)