TU-Wien-dataLAB
diff --git a/‎docs/source/admin/system_architecture.md‎
Lines changed: 1 addition & 2 deletions b/‎docs/source/admin/system_architecture.md‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎grader_service/autograding/celery/tasks.py‎
Lines changed: 2 additions & 2 deletions b/‎grader_service/autograding/celery/tasks.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎grader_service/autograding/git_manager.py‎
Lines changed: 162 additions & 0 deletions b/‎grader_service/autograding/git_manager.py‎
Lines changed: 162 additions & 0 deletions
diff --git a/‎grader_service/autograding/kube/kube_grader.py‎
Lines changed: 11 additions & 16 deletions b/‎grader_service/autograding/kube/kube_grader.py‎
Lines changed: 11 additions & 16 deletions
@@ -114,11 +114,10 @@ and handle the evaluation of user submissions, such as running notebooks for gra
 Auto-grading is accomplished using the `grader_service.convert` submodule, which can be executed as a command-line interface (CLI). 
 Different executors are available to manage this:
 - **LocalAutogradeExecutor**: Executes the module directly within the current Python process on the worker by importing the package and invoking the converters.
-- **LocalProcessAutogradeExecutor**: Runs the submodule in a separate process.
+- **LocalAutogradeProcessExecutor**: Runs the submodule in a separate process.
 - **KubeAutogradeExecutor**: Spawns a Kubernetes pod to run the submodule. This is the only approach that allows different images for lectures, as the grading code must be executed in the same environment as the lecture.
 
 
 # How To Scale
 
 [//]: # (TODO: what is the minimal setup? what is the most sophisticated setup)
-
@@ -5,7 +5,7 @@
 from tornado.web import HTTPError
 
 from grader_service.autograding.celery.app import CeleryApp
-from grader_service.autograding.local_feedback import GenerateFeedbackExecutor
+from grader_service.autograding.local_feedback import LocalFeedbackExecutor
 from grader_service.handlers.base_handler import RequestHandlerConfig
 from grader_service.orm.submission import FeedbackStatus, Submission
 from grader_service.plugins.lti import LTISyncGrades
@@ -78,7 +78,7 @@ def generate_feedback_task(self: GraderTask, lecture_id: int, assignment_id: int
             f"invalid submission {submission.id}: {assignment_id=:}, {lecture_id=:} does not match"
         )
 
-    executor = GenerateFeedbackExecutor(grader_service_dir, submission, config=self.celery.config)
+    executor = LocalFeedbackExecutor(grader_service_dir, submission, config=self.celery.config)
     executor.start()
     if submission.feedback_status == FeedbackStatus.GENERATED:
         self.log.info("Successfully generated feedback for submission %s!", submission.id)
 
@@ -0,0 +1,162 @@
+import os
+import subprocess
+from typing import Any, List, Optional
+
+from traitlets import Unicode, validate
+from traitlets.config import LoggingConfigurable
+
+from grader_service.autograding.utils import executable_validator
+from grader_service.handlers.handler_utils import GitRepoType
+from grader_service.orm import Assignment, Lecture, Submission
+
+
+class GitSubmissionManager(LoggingConfigurable):
+    """
+    Handles git-related operations performed by autograder executors:
+    pulling from an input repo, and committing and pushing to the output repo.
+    """
+
+    git_executable = Unicode("git", allow_none=False).tag(config=True)
+    input_repo_type = GitRepoType.USER
+    output_repo_type = GitRepoType.AUTOGRADE
+
+    def __init__(self, grader_service_dir: str, submission: Submission, **kwargs: Any):
+        super().__init__(**kwargs)
+        self.grader_service_dir = grader_service_dir
+        self.submission = submission
+
+        if self.input_repo_type == GitRepoType.USER and self.submission.edited:
+            # User's submission was edited by the instructor - repo type has to be adjusted
+            self.input_repo_type = GitRepoType.EDIT
+
+        self.input_branch = "main"
+        self.output_branch = f"submission_{self.submission.commit_hash}"
+
+    def _get_repo_path(self, repo_type: GitRepoType) -> str:
+        """Determines the Git repository path for the submission."""
+        assignment: Assignment = self.submission.assignment
+        lecture: Lecture = assignment.lecture
+        repo_name = self.submission.user.name
+
+        base_repo_path = os.path.join(
+            self.grader_service_dir, "git", lecture.code, str(assignment.id), repo_type
+        )
+        if repo_type in [GitRepoType.AUTOGRADE, GitRepoType.FEEDBACK]:
+            path = os.path.join(base_repo_path, "user", repo_name)
+        elif repo_type == GitRepoType.EDIT:
+            path = os.path.join(base_repo_path, str(self.submission.id))
+        elif repo_type == GitRepoType.USER:
+            path = os.path.join(base_repo_path, repo_name)
+        else:
+            raise ValueError(f"Cannot determine repo path for repo type {repo_type}")
+
+        path = os.path.normpath(path)
+
+        if not path.startswith(self.grader_service_dir):
+            self.log.error(
+                f"Invalid repo path: {path}. Possibly suspicious values: "
+                f"lecture code: '{lecture.code}' or user name: '{repo_name}'"
+            )
+            raise PermissionError("Invalid repository path.")
+
+        return path
+
+    def pull_submission(self, input_path: str) -> None:
+        """Inits and pulls the submission repository into the input path.
+
+        :param input_path: The directory where the input repo will be created.
+        """
+        input_repo_path = self._get_repo_path(self.input_repo_type)
+
+        self.log.info(f"Pulling repo {input_repo_path} into input directory")
+        commands = [
+            [self.git_executable, "init"],
+            [self.git_executable, "pull", input_repo_path, self.input_branch],
+        ]
+
+        # When autograding a user's submission, check out to the commit of submission
+        if self.input_repo_type == GitRepoType.USER:
+            commands.append([self.git_executable, "checkout", self.submission.commit_hash])
+
+        for cmd in commands:
+            self._run_git(cmd, input_path)
+
+        self.log.info("Successfully cloned repo.")
+
+    def _set_up_output_repo(self, output_path: str) -> None:
+        """Initializes the output repo and switches to a separate branch named
+        after the commit hash of the submission."""
+        output_repo_path = self._get_repo_path(self.output_repo_type)
+
+        if not os.path.exists(output_repo_path):
+            os.makedirs(output_repo_path)
+            self._run_git([self.git_executable, "init", "--bare", output_repo_path], output_path)
+
+        self.log.info(f"Initialising repo at {output_path}")
+        self._run_git([self.git_executable, "init"], output_path)
+        self.log.info(f"Creating the new branch {self.output_branch} and switching to it")
+        command = [self.git_executable, "switch", "-c", self.output_branch]
+        self._run_git(command, output_path)
+        self.log.info(f"Now at branch {self.output_branch}")
+
+    def _commit_files(self, filenames: List[str], output_path: str) -> None:
+        """
+        Commits the provided files in the repo at `output_path`.
+        """
+        self.log.info(f"Committing files in {output_path}")
+
+        if not filenames:
+            self.log.info("No files to commit.")
+            return
+
+        self._run_git([self.git_executable, "add", "--", *filenames], output_path)
+        self._run_git(
+            [self.git_executable, "commit", "-m", self.submission.commit_hash], output_path
+        )
+
+    def push_results(self, filenames: List[str], output_path: str) -> None:
+        """Creates the output repository, commits and pushes the changes."""
+        self._set_up_output_repo(output_path)
+        self._commit_files(filenames, output_path)
+
+        output_repo_path = self._get_repo_path(self.output_repo_type)
+
+        self.log.info(f"Pushing to {output_repo_path} at branch {self.output_branch}")
+        self._run_git(
+            [self.git_executable, "push", "-uf", output_repo_path, self.output_branch], output_path
+        )
+        self.log.info("Pushing complete")
+
+    def _run_git(self, command: list[str], cwd: Optional[str]) -> None:
+        """
+        Execute a git command as a subprocess.
+
+        Args:
+            command: The git command to execute, as a list of strings.
+            cwd: The working directory the subprocess should run in.
+        Raises:
+            `subprocess.CalledProcessError`: if `subprocess.run` fails.
+            Any other exception thrown while running the subprocess is logged and also re-raised.
+        """
+        assert command[0] == self.git_executable, f"Not a git command: {command}"
+        self.log.debug('Running "%s"', " ".join(command))
+        try:
+            subprocess.run(
+                command,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                cwd=cwd,
+                text=True,  # Decodes output to string
+                check=True,  # Raises a CalledProcessError on non-zero exit code
+            )
+        except subprocess.CalledProcessError as e:
+            self.log.error(e.stderr)
+            raise
+        except Exception as e:
+            self.log.error(e)
+            raise
+
+    # TODO: Can I decorate executable_validator with both "git_executable" and "convert_executable"?
+    @validate("git_executable")
+    def _validate_executable(self, proposal):
+        return executable_validator(proposal)
@@ -7,7 +7,6 @@
 import asyncio
 import inspect
 import json
-import os
 import re
 import time
 from asyncio import Task, run
@@ -20,7 +19,6 @@
 
 from grader_service.autograding.kube.util import get_current_namespace, make_pod
 from grader_service.autograding.local_grader import LocalAutogradeExecutor
-from grader_service.autograding.utils import rmtree
 from grader_service.orm import Assignment, Lecture, Submission
 from grader_service.orm.assignment import json_serial
 
@@ -282,7 +280,7 @@ def __init__(self, grader_service_dir: str, submission: Submission, **kwargs):
             self.log.info(f"Setting Namespace for submission {self.submission.id}")
             self.namespace = get_current_namespace()
 
-    def get_image(self) -> str:
+    def _get_image(self) -> str:
         """
         Returns the image name based on the lecture and assignment.
         If an image config file exists and has
@@ -312,7 +310,7 @@ def get_image(self) -> str:
             else:
                 return self.resolve_image_name(self.lecture, self.assignment)
 
-    def get_autograde_pod_name(self) -> str:
+    def _get_autograde_pod_name(self) -> str:
         # sanitize username by converting to lowercase and replacing non-alphanumeric chars
         sanitized_username = re.sub(r"[^a-zA-Z0-9]+", "-", self.submission.user.name.lower())
 
@@ -325,7 +323,7 @@ def get_autograde_pod_name(self) -> str:
 
         return f"autograde-job-{sanitized_username}-{self.submission.id}"
 
-    def create_env(self) -> list[V1EnvVar]:
+    def _create_env(self) -> list[V1EnvVar]:
         env = [
             V1EnvVar(
                 name="ASSIGNMENT_SETTINGS",
@@ -334,7 +332,7 @@ def create_env(self) -> list[V1EnvVar]:
         ]
         return env
 
-    def start_pod(self) -> GraderPod:
+    def _start_pod(self) -> GraderPod:
         """
         Starts a pod in the namespace
         with the commit hash as the name of the pod.
@@ -369,14 +367,14 @@ def start_pod(self) -> GraderPod:
         ]
         volume_mounts = volume_mounts + self.extra_volume_mounts
 
-        env = self.create_env()
+        env = self._create_env()
 
         # create pod spec
         pod = make_pod(
-            name=self.get_autograde_pod_name(),
+            name=self._get_autograde_pod_name(),
             cmd=command,
             env=env,
-            image=self.get_image(),
+            image=self._get_image(),
             image_pull_policy=self.image_pull_policy,
             image_pull_secrets=self.image_pull_secrets,
             working_dir="/",
@@ -406,13 +404,9 @@ def _run(self):
         input and output directory through a persistent volume claim.
         :return: Coroutine
         """
-        if os.path.exists(self.output_path):
-            rmtree(self.output_path)
-        os.makedirs(self.output_path, exist_ok=True)
-        self._write_gradebook(self._put_grades_in_assignment_properties())
         grader_pod = None
         try:
-            grader_pod = self.start_pod()
+            grader_pod = self._start_pod()
             self.log.info(f"Started pod {grader_pod.name} in namespace {grader_pod.namespace}")
             status = grader_pod.poll()
             self.grading_logs = self._get_pod_logs(grader_pod)
@@ -429,8 +423,9 @@ def _run(self):
             error_message = json.loads(e.body)
             if error_message["reason"] != "AlreadyExists" and grader_pod is not None:
                 try:
-                    namespace = grader_pod.namespace
-                    self.client.delete_namespaced_pod(name=grader_pod.name, namespace=namespace)
+                    self.client.delete_namespaced_pod(
+                        name=grader_pod.name, namespace=grader_pod.namespace
+                    )
                 except ApiException:
                     pass
             self.log.error(f"{error_message['reason']}: {error_message['message']}")