UniversityOfHelsinkiCS
diff --git a/‎dalai/prompts.ts‎
Lines changed: 0 additions & 24 deletions b/‎dalai/prompts.ts‎
Lines changed: 0 additions & 24 deletions
diff --git a/‎dalai/worker.ts‎
Lines changed: 14 additions & 3 deletions b/‎dalai/worker.ts‎
Lines changed: 14 additions & 3 deletions
@@ -10,7 +10,6 @@ import { promisify } from 'node:util'
 import pdfToText from 'pdf-parse-fork'
 import { pdfToPng, type PngPageOutput } from 'pdf-to-png-converter'
 import logger from './logger.ts'
-import { MARKDOWN_PROMPT, TRANSCRIPTION_PROMPT_2 } from './prompts.ts'
 
 dotenv.config()
 
@@ -304,7 +303,13 @@ const worker = new Worker(
               headers: { 'Content-Type': 'application/json' },
               body: JSON.stringify({
                 model: 'qwen2.5vl:7b',
-                system: TRANSCRIPTION_PROMPT_2,
+                system: `Your task is to transcribe the content of a PDF page given to you as an image.
+                  If the given PDF page contains an image, or a diagram, describe it in detail.
+                  Enclose the description in an **image** tag. For example: **image** This is an image of a cat. **image**.
+                  You are also given the text extracted from the PDF using a PDF parser.
+                  Your task is to combine these two sources of information to produce the most accurate transcription possible.
+                  When there are discrepancies between the image transcription and the PDF text, prioritize the parsed PDF text.
+                  But you are always obligated to keep the **image** tags intact.`,
                 prompt: `Parsed PDF text:\n${pdfText}\n\nImage transcription:`,
                 stream: false,
                 images: [image.toString('base64')],
@@ -335,7 +340,13 @@ const worker = new Worker(
               },
               body: JSON.stringify({
                 model: 'qwen2.5vl:7b',
-                system: MARKDOWN_PROMPT,
+                system: `Your task is to accurately extract and combine text from image transcription and PDF sources into Markdown.
+                  You are given text containing both the transcription text and PDF text.
+                  When there are discrepancies between the transcription text and the PDF text, prioritize the PDF text!
+                  Transcription can contain errors, PDF is the source of truth! If the texts are similar, merge them to create a comprehensive version.
+                  Ensure the final output is well-structured Markdown and free of errors. Do not output anything else than Markdown.
+                  Do not surround the output with a Markdown code block! Use headings, lists, bold, italics, tables etc. where appropriate.
+                  Remeber you are always obligated to keep the **image** tags and tags insides intact.`,
                 prompt: `Transcription:\n${transcription}\n\nPDF:\n${pdfText}\n\nCombined Markdown:`,
                 stream: false,
               }),