Submitty
diff --git a/‎migration/migrator/data/course_tables.sql‎
Lines changed: 62 additions & 0 deletions b/‎migration/migrator/data/course_tables.sql‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎migration/migrator/migrations/course/20250312145730_add_redactions.py‎
Lines changed: 43 additions & 0 deletions b/‎migration/migrator/migrations/course/20250312145730_add_redactions.py‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎more_autograding_examples/bulk_upload_pdfs/submissions/redactions.json‎
Lines changed: 9 additions & 0 deletions b/‎more_autograding_examples/bulk_upload_pdfs/submissions/redactions.json‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎sbin/submitty_daemon_jobs/submitty_jobs/bulk_qr_split.py‎
Lines changed: 0 additions & 4 deletions b/‎sbin/submitty_daemon_jobs/submitty_jobs/bulk_qr_split.py‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎sbin/submitty_daemon_jobs/submitty_jobs/bulk_upload_split.py‎
Lines changed: 0 additions & 2 deletions b/‎sbin/submitty_daemon_jobs/submitty_jobs/bulk_upload_split.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎sbin/submitty_daemon_jobs/submitty_jobs/generate_pdf_images.py‎
Lines changed: 39 additions & 13 deletions b/‎sbin/submitty_daemon_jobs/submitty_jobs/generate_pdf_images.py‎
Lines changed: 39 additions & 13 deletions
diff --git a/‎sbin/submitty_daemon_jobs/submitty_jobs/jobs.py‎
Lines changed: 24 additions & 3 deletions b/‎sbin/submitty_daemon_jobs/submitty_jobs/jobs.py‎
Lines changed: 24 additions & 3 deletions
diff --git a/‎sbin/submitty_daemon_jobs/submitty_jobs/regenerate_bulk_images.py‎
Lines changed: 35 additions & 0 deletions b/‎sbin/submitty_daemon_jobs/submitty_jobs/regenerate_bulk_images.py‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎sbin/submitty_daemon_jobs/tests/test_bulk_pdf_split.py‎
Lines changed: 0 additions & 10 deletions b/‎sbin/submitty_daemon_jobs/tests/test_bulk_pdf_split.py‎
Lines changed: 0 additions & 10 deletions
@@ -1388,6 +1388,45 @@ CREATE SEQUENCE public.gradeable_data_overall_comment_goc_id_seq
 ALTER SEQUENCE public.gradeable_data_overall_comment_goc_id_seq OWNED BY public.gradeable_data_overall_comment.goc_id;
 
 
+--
+-- Name: gradeable_redaction; Type: TABLE; Schema: public; Owner: -
+--
+
+CREATE TABLE public.gradeable_redaction (
+    redaction_id integer NOT NULL,
+    g_id character varying(255) NOT NULL,
+    page integer NOT NULL,
+    x1 double precision NOT NULL,
+    x2 double precision NOT NULL,
+    y1 double precision NOT NULL,
+    y2 double precision NOT NULL,
+    CONSTRAINT x1_positive CHECK (((x1 >= (0)::double precision) AND (x1 <= x2))),
+    CONSTRAINT x2_positive CHECK (((x2 >= (0)::double precision) AND (x2 <= (1)::double precision))),
+    CONSTRAINT y1_positive CHECK (((y1 >= (0)::double precision) AND (y1 <= y2))),
+    CONSTRAINT y2_positive CHECK (((y2 >= (0)::double precision) AND (y2 <= (1)::double precision)))
+);
+
+
+--
+-- Name: gradeable_redaction_redaction_id_seq; Type: SEQUENCE; Schema: public; Owner: -
+--
+
+CREATE SEQUENCE public.gradeable_redaction_redaction_id_seq
+    AS integer
+    START WITH 1
+    INCREMENT BY 1
+    NO MINVALUE
+    NO MAXVALUE
+    CACHE 1;
+
+
+--
+-- Name: gradeable_redaction_redaction_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: -
+--
+
+ALTER SEQUENCE public.gradeable_redaction_redaction_id_seq OWNED BY public.gradeable_redaction.redaction_id;
+
+
 --
 -- Name: gradeable_teams; Type: TABLE; Schema: public; Owner: -
 --
@@ -2166,6 +2205,13 @@ ALTER TABLE ONLY public.gradeable_data ALTER COLUMN gd_id SET DEFAULT nextval('p
 ALTER TABLE ONLY public.gradeable_data_overall_comment ALTER COLUMN goc_id SET DEFAULT nextval('public.gradeable_data_overall_comment_goc_id_seq'::regclass);
 
 
+--
+-- Name: gradeable_redaction redaction_id; Type: DEFAULT; Schema: public; Owner: -
+--
+
+ALTER TABLE ONLY public.gradeable_redaction ALTER COLUMN redaction_id SET DEFAULT nextval('public.gradeable_redaction_redaction_id_seq'::regclass);
+
+
 --
 -- Name: lichen id; Type: DEFAULT; Schema: public; Owner: -
 --
@@ -2499,6 +2545,14 @@ ALTER TABLE ONLY public.gradeable
     ADD CONSTRAINT gradeable_pkey PRIMARY KEY (g_id);
 
 
+--
+-- Name: gradeable_redaction gradeable_redaction_pkey; Type: CONSTRAINT; Schema: public; Owner: -
+--
+
+ALTER TABLE ONLY public.gradeable_redaction
+    ADD CONSTRAINT gradeable_redaction_pkey PRIMARY KEY (redaction_id);
+
+
 --
 -- Name: grade_inquiries gradeable_team_gc_id; Type: CONSTRAINT; Schema: public; Owner: -
 --
@@ -3398,6 +3452,14 @@ ALTER TABLE ONLY public.gradeable_data_overall_comment
     ADD CONSTRAINT gradeable_data_overall_comment_goc_user_id_fkey FOREIGN KEY (goc_user_id) REFERENCES public.users(user_id) ON DELETE CASCADE;
 
 
+--
+-- Name: gradeable_redaction gradeable_redaction_g_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: -
+--
+
+ALTER TABLE ONLY public.gradeable_redaction
+    ADD CONSTRAINT gradeable_redaction_g_id_fkey FOREIGN KEY (g_id) REFERENCES public.gradeable(g_id) ON DELETE CASCADE;
+
+
 --
 -- Name: gradeable_teams gradeable_teams_g_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: -
 --
 
@@ -0,0 +1,43 @@
+"""Migration for a given Submitty course database."""
+
+
+def up(config, database, semester, course):
+    """
+    Run up migration.
+
+    :param config: Object holding configuration details about Submitty
+    :type config: migrator.config.Config
+    :param database: Object for interacting with given database for environment
+    :type database: migrator.db.Database
+    :param semester: Semester of the course being migrated
+    :type semester: str
+    :param course: Code of course being migrated
+    :type course: str
+    """
+    database.execute("""
+                     CREATE TABLE IF NOT EXISTS gradeable_redaction (
+                        redaction_id SERIAL PRIMARY KEY,
+                        g_id character varying(255) NOT NULL REFERENCES gradeable(g_id) ON DELETE CASCADE,
+                        page integer NOT NULL,
+                        x1 float NOT NULL CONSTRAINT x1_positive CHECK (x1 >= 0 AND x1 <= x2),
+                        x2 float NOT NULL CONSTRAINT x2_positive CHECK (x2 >= 0 AND x2 <= 1),
+                        y1 float NOT NULL CONSTRAINT y1_positive CHECK (y1 >= 0 AND y1 <= y2),
+                        y2 float NOT NULL CONSTRAINT y2_positive CHECK (y2 >= 0 AND y2 <= 1)
+                     )
+    """)
+
+
+def down(config, database, semester, course):
+    """
+    Run down migration (rollback).
+
+    :param config: Object holding configuration details about Submitty
+    :type config: migrator.config.Config
+    :param database: Object for interacting with given database for environment
+    :type database: migrator.db.Database
+    :param semester: Semester of the course being migrated
+    :type semester: str
+    :param course: Code of course being migrated
+    :type course: str
+    """
+    pass
@@ -0,0 +1,9 @@
+[
+  {
+    "page": 2,
+    "x1": 0,
+    "y1": 0,
+    "x2": 0.3,
+    "y2": 0.3
+  }
+]
@@ -7,7 +7,6 @@
 import numpy
 from . import write_to_log as logger
 from . import submitty_ocr as scanner
-from . import generate_pdf_images
 
 # try importing required modules
 try:
@@ -103,14 +102,12 @@ def main(args):
                     logger.write_to_json(json_file, output)
                     with open(prev_file, 'wb') as out:
                         pdf_writer.write(out)
-                    generate_pdf_images.main(prev_file, [])
 
                 if id_index == 1:
                     # correct first pdf's page count and print file
                     output[prev_file]['page_count'] = page_count
                     with open(prev_file, 'wb') as out:
                         pdf_writer.write(out)
-                    generate_pdf_images.main(prev_file, [])
 
                 # start a new pdf and grab the cover
                 cover_writer = PdfWriter()
@@ -170,7 +167,6 @@ def main(args):
 
         with open(prev_file, 'wb') as out:
             pdf_writer.write(out)
-        generate_pdf_images.main(prev_file, [])
         # write the buffer to the log file, so everything is on one line
         logger.write_to_log(log_file_path, buff)
     except Exception:
 
@@ -7,7 +7,6 @@
 import traceback
 from PyPDF2 import PdfWriter
 from . import write_to_log as logger
-from . import generate_pdf_images
 
 try:
     from pdf2image import convert_from_bytes
@@ -62,7 +61,6 @@ def main(args):
                     i += 1
                 with open(output_filename, 'wb') as out:
                     pdf_writer.write(out)
-                generate_pdf_images.main(output_filename, [])
 
                 with open(cover_filename, 'wb') as out:
                     cover_writer.write(out)
 
@@ -3,7 +3,7 @@
 from typing import List, Sequence
 
 from pdf2image import convert_from_bytes
-from PIL import Image, ImageDraw
+from PIL import ImageDraw
 from PyPDF2 import PdfReader
 
 
@@ -13,28 +13,54 @@ def __init__(self, page_number: int, coordinates: Sequence[float]):
         self.coordinates = coordinates
 
 
-def main(pdf_file_path: str, redactions: List[Redaction]):
+def main(pdf_file_path: str, output_dir: str, redactions: List[Redaction]):
     directory = os.path.dirname(pdf_file_path)
     if directory:
         os.chdir(os.path.dirname(pdf_file_path))
+    # Ensure the output directory exists
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
     try:
         pdfPages = PdfReader(pdf_file_path, strict=False)
-        with open(pdf_file_path, 'rb') as open_file:
+        with open(pdf_file_path, "rb") as open_file:
             imagePages = convert_from_bytes(
                 open_file.read(),
-                )
+            )
+        # Loop through each page in the PDF and save it as an image
         for page_number in range(len(pdfPages.pages)):
-            image_filename = pdf_file_path[:-4] + '_' + str(page_number + 1).zfill(3) + '.jpg'
-            imagePages[page_number].save(image_filename,
-                                         "JPEG", quality=20, optimize=True)
+            image_filename = os.path.join(
+                output_dir,
+                "."
+                + os.path.basename(pdf_file_path[:-4])
+                + "_page_"
+                + str(page_number + 1).zfill(2)
+                + ".jpg",
+            )
+            img = imagePages[page_number]
+            draw = ImageDraw.Draw(img)
             for redaction in redactions:
-                if redaction.page_number != page_number:
+                # Add 1 to page_number because redactions are 1-indexed
+                # and page_number is 0-indexed
+                if redaction.page_number != page_number + 1:
                     continue
-                img = Image.open(image_filename)
-                draw = ImageDraw.Draw(img)
-                draw.rectangle(redaction.coordinates, fill="black")
-                img.save(image_filename,
-                         "JPEG", quality=20, optimize=True)
+                square_size = 25
+
+                # Convert coordinates from relative to absolute pixel values
+                x0 = int(redaction.coordinates[0] * img.size[0])
+                y0 = int(redaction.coordinates[1] * img.size[1])
+                x1 = int(redaction.coordinates[2] * img.size[0])
+                y1 = int(redaction.coordinates[3] * img.size[1])
+
+                # Create a grid of black and grey squares within the redaction area
+                # Loops ensure that the checkered pattern is created
+                for y in range(y0, y1, square_size):
+                    for x in range(x0, x1, square_size):
+                        fill_color = "black" if ((x // square_size + y // square_size) % 2 == 0) else "grey"
+                        draw.rectangle(
+                            [x, y, x + square_size, y + square_size], fill=fill_color
+                        )
+            print(f"Saving image {image_filename}")
+            img.save(image_filename, "JPEG", quality=20, optimize=True)
     except Exception:
         msg = "Failed when splitting pdf " + pdf_file_path
         print(msg)
 
@@ -16,6 +16,8 @@
 import requests
 from urllib.parse import unquote
 from tempfile import TemporaryDirectory
+
+from . import regenerate_bulk_images
 from . import bulk_qr_split
 from . import bulk_upload_split
 from . import generate_pdf_images
@@ -351,10 +353,15 @@ def run_job(self):
 
 class GeneratePdfImages(AbstractJob):
     def run_job(self):
-        pdf_file_path = self.job_details['pdf_file_path']
+        pdf_file_path = self.job_details["pdf_file_path"]
+        output_dir = self.job_details["output_dir"]
         # optionally get redactions
-        redactions = self.job_details.get('redactions', [])
-        generate_pdf_images.main(pdf_file_path, [generate_pdf_images.Redaction(**r) for r in redactions])
+        redactions = self.job_details.get("redactions", [])
+        generate_pdf_images.main(
+            pdf_file_path,
+            output_dir,
+            [generate_pdf_images.Redaction(**r) for r in redactions],
+        )
 
     def cleanup_job(self):
         pass
@@ -435,6 +442,20 @@ def cleanup_job(self):
         pass
 
 
+# Used to regenerate images for all submissions in a bulk upload
+class RegenerateBulkImages(AbstractJob):
+    def run_job(self):
+        folder = self.job_details["pdf_file_path"]
+        redactions = [
+            generate_pdf_images.Redaction(**r)
+            for r in self.job_details.get("redactions", [])
+        ]
+        regenerate_bulk_images.main(folder, redactions)
+
+    def cleanup_job(self):
+        pass
+
+
 class DocxToPDF(AbstractJob):
     def run_job(self):
         log_dir = os.path.join(DATA_DIR, "logs", "docx_to_pdf")
 
@@ -0,0 +1,35 @@
+import json
+from pathlib import Path
+
+from . import generate_pdf_images
+
+
+# Regenerate images for all submissions in a bulk upload
+def main(folder, redactions):
+    # Convert folder to Path object
+    folder_path = Path(folder)
+
+    # loop over all submitters in folder and regrade their active version
+    for submitter_dir in [d for d in folder_path.iterdir() if d.is_dir()]:
+        # Read user_assignment_settings.json to get the active version
+        settings_path = submitter_dir / "user_assignment_settings.json"
+
+        with open(settings_path, "r") as f:
+            settings = json.load(f)
+            active_version = settings.get("active_version", None)
+
+            if active_version is None:
+                continue
+
+            active_version_path = submitter_dir / str(active_version)
+            # Check if the active version is a directory
+            if not active_version_path.is_dir():
+                continue
+            # Run the generate_pdf_images job on the active version
+            pdf_path = active_version_path / "upload.pdf"
+            results_path = str(active_version_path).replace("submissions", "submissions_processed")
+            generate_pdf_images.main(
+                str(pdf_path),
+                results_path,
+                redactions,
+            )
@@ -46,11 +46,6 @@ def test_split_pdf(self):
             cover_tgt = Path(file_name + '_' + str(i_idx).zfill(2) + '_cover.pdf')
             self.assertTrue(split_tgt.is_file())
 
-            #verify each page png is being produced
-            for j_idx in range(1,tgt_num_pages+1):
-                page_tgt = Path(file_name + '_' + str(i_idx).zfill(2) + '_' + str(j_idx).zfill(3)  + '.jpg')
-                self.assertTrue(page_tgt.is_file())
-
 
     #Test handling a bad number of given pages to split a pdf gracefully
     def test_bad_split_number(self):
@@ -115,11 +110,6 @@ def test_split_qr(self):
                 cover_tgt = Path(file_name + '_' + str(i_idx).zfill(3) + '_cover.pdf')
                 self.assertTrue(split_tgt.is_file())
 
-                #verify each page png is being produced
-                for j_idx in range(1,tgt_num_pages+1):
-                    page_tgt = Path(file_name + '_' + str(i_idx).zfill(3) + '_' + str(j_idx).zfill(3)  + '.jpg')
-                    self.assertTrue(page_tgt.is_file())
-
 
 
     def test_split_qr_url(self):