Skip to content
This repository was archived by the owner on Jun 24, 2025. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/playwright.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ jobs:
run: pnpm install --frozen-lockfile
- run: pnpx playwright install --with-deps
- uses: nrwl/nx-set-shas@v4

- name: Install playwright
run: pnpm exec playwright install
# Prepend any command with "nx-cloud record --" to record its logs to Nx Cloud
# - run: npx nx-cloud record -- echo Hello World
# Nx Affected runs only tasks affected by the changes in this PR/commit. Learn more: https://nx.dev/ci/features/affected
Expand Down
17 changes: 16 additions & 1 deletion apps/client/src/translations/en/translation.json
Original file line number Diff line number Diff line change
Expand Up @@ -1297,7 +1297,22 @@
"enable_image_compression": "Enable image compression",
"max_image_dimensions": "Max width / height of an image (image will be resized if it exceeds this setting).",
"max_image_dimensions_unit": "pixels",
"jpeg_quality_description": "JPEG quality (10 - worst quality, 100 - best quality, 50 - 85 is recommended)"
"jpeg_quality_description": "JPEG quality (10 - worst quality, 100 - best quality, 50 - 85 is recommended)",
"ocr_section_title": "Optical Character Recognition (OCR)",
"enable_ocr": "Enable OCR for images",
"ocr_description": "Automatically extract text from images using OCR technology. This makes image content searchable within your notes.",
"ocr_auto_process": "Automatically process new images with OCR",
"ocr_language": "OCR Language",
"ocr_min_confidence": "Minimum confidence threshold",
"ocr_confidence_unit": "(0.0-1.0)",
"ocr_confidence_description": "Only extract text with confidence above this threshold. Lower values include more text but may be less accurate.",
"batch_ocr_title": "Process Existing Images",
"batch_ocr_description": "Process all existing images in your notes with OCR. This may take some time depending on the number of images.",
"batch_ocr_start": "Start Batch OCR Processing",
"batch_ocr_starting": "Starting batch OCR processing...",
"batch_ocr_progress": "Processing {{processed}} of {{total}} images...",
"batch_ocr_completed": "Batch OCR completed! Processed {{processed}} images.",
"batch_ocr_error": "Error during batch OCR: {{error}}"
},
"attachment_erasure_timeout": {
"attachment_erasure_timeout": "Attachment Erasure Timeout",
Expand Down
193 changes: 193 additions & 0 deletions apps/client/src/widgets/type_widgets/options/images/images.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import OptionsWidget from "../options_widget.js";
import { t } from "../../../../services/i18n.js";
import type { OptionMap } from "@triliumnext/commons";
import server from "../../../../services/server.js";
import toastService from "../../../../services/toast.js";

const TPL = /*html*/`
<div class="options-section">
Expand All @@ -9,6 +11,12 @@ const TPL = /*html*/`
opacity: 0.5;
pointer-events: none;
}
.batch-ocr-progress {
margin-top: 10px;
}
.batch-ocr-button {
margin-top: 10px;
}
</style>

<h4>${t("images.images_section_title")}</h4>
Expand Down Expand Up @@ -44,6 +52,70 @@ const TPL = /*html*/`
</label>
</div>
</div>

<hr />

<h5>${t("images.ocr_section_title")}</h5>

<label class="tn-checkbox">
<input class="ocr-enabled" type="checkbox" name="ocr-enabled">
${t("images.enable_ocr")}
</label>

<p class="form-text">${t("images.ocr_description")}</p>

<div class="ocr-settings-wrapper">
<label class="tn-checkbox">
<input class="ocr-auto-process" type="checkbox" name="ocr-auto-process">
${t("images.ocr_auto_process")}
</label>

<div class="form-group">
<label>${t("images.ocr_language")}</label>
<select class="ocr-language form-control">
<option value="eng">English</option>
<option value="spa">Spanish</option>
<option value="fra">French</option>
<option value="deu">German</option>
<option value="ita">Italian</option>
<option value="por">Portuguese</option>
<option value="rus">Russian</option>
<option value="chi_sim">Chinese (Simplified)</option>
<option value="chi_tra">Chinese (Traditional)</option>
<option value="jpn">Japanese</option>
<option value="kor">Korean</option>
<option value="ara">Arabic</option>
<option value="hin">Hindi</option>
<option value="tha">Thai</option>
<option value="vie">Vietnamese</option>
</select>
</div>

<div class="form-group">
<label>${t("images.ocr_min_confidence")}</label>
<label class="input-group tn-number-unit-pair">
<input class="ocr-min-confidence form-control options-number-input" type="number" min="0" max="1" step="0.1">
<span class="input-group-text">${t("images.ocr_confidence_unit")}</span>
</label>
<div class="form-text">${t("images.ocr_confidence_description")}</div>
</div>

<div class="batch-ocr-section">
<h6>${t("images.batch_ocr_title")}</h6>
<p class="form-text">${t("images.batch_ocr_description")}</p>

<button class="btn btn-primary batch-ocr-button">
${t("images.batch_ocr_start")}
</button>

<div class="batch-ocr-progress" style="display: none;">
<div class="progress">
<div class="progress-bar" role="progressbar" style="width: 0%"></div>
</div>
<div class="batch-ocr-status"></div>
</div>
</div>
</div>
</div>
`;

Expand All @@ -55,9 +127,21 @@ export default class ImageOptions extends OptionsWidget {
private $enableImageCompression!: JQuery<HTMLElement>;
private $imageCompressionWrapper!: JQuery<HTMLElement>;

// OCR elements
private $ocrEnabled!: JQuery<HTMLElement>;
private $ocrAutoProcess!: JQuery<HTMLElement>;
private $ocrLanguage!: JQuery<HTMLElement>;
private $ocrMinConfidence!: JQuery<HTMLElement>;
private $ocrSettingsWrapper!: JQuery<HTMLElement>;
private $batchOcrButton!: JQuery<HTMLElement>;
private $batchOcrProgress!: JQuery<HTMLElement>;
private $batchOcrProgressBar!: JQuery<HTMLElement>;
private $batchOcrStatus!: JQuery<HTMLElement>;

doRender() {
this.$widget = $(TPL);

// Image settings
this.$imageMaxWidthHeight = this.$widget.find(".image-max-width-height");
this.$imageJpegQuality = this.$widget.find(".image-jpeg-quality");

Expand All @@ -76,16 +160,48 @@ export default class ImageOptions extends OptionsWidget {
this.updateCheckboxOption("compressImages", this.$enableImageCompression);
this.setImageCompression();
});

// OCR settings
this.$ocrEnabled = this.$widget.find(".ocr-enabled");
this.$ocrAutoProcess = this.$widget.find(".ocr-auto-process");
this.$ocrLanguage = this.$widget.find(".ocr-language");
this.$ocrMinConfidence = this.$widget.find(".ocr-min-confidence");
this.$ocrSettingsWrapper = this.$widget.find(".ocr-settings-wrapper");
this.$batchOcrButton = this.$widget.find(".batch-ocr-button");
this.$batchOcrProgress = this.$widget.find(".batch-ocr-progress");
this.$batchOcrProgressBar = this.$widget.find(".progress-bar");
this.$batchOcrStatus = this.$widget.find(".batch-ocr-status");

this.$ocrEnabled.on("change", () => {
this.updateCheckboxOption("ocrEnabled", this.$ocrEnabled);
this.setOcrVisibility();
});

this.$ocrAutoProcess.on("change", () => this.updateCheckboxOption("ocrAutoProcessImages", this.$ocrAutoProcess));

this.$ocrLanguage.on("change", () => this.updateOption("ocrLanguage", this.$ocrLanguage.val()));

this.$ocrMinConfidence.on("change", () => this.updateOption("ocrMinConfidence", String(this.$ocrMinConfidence.val()).trim() || "0.6"));

this.$batchOcrButton.on("click", () => this.startBatchOcr());
}

optionsLoaded(options: OptionMap) {
// Image settings
this.$imageMaxWidthHeight.val(options.imageMaxWidthHeight);
this.$imageJpegQuality.val(options.imageJpegQuality);

this.setCheckboxState(this.$downloadImagesAutomatically, options.downloadImagesAutomatically);
this.setCheckboxState(this.$enableImageCompression, options.compressImages);

// OCR settings
this.setCheckboxState(this.$ocrEnabled, options.ocrEnabled);
this.setCheckboxState(this.$ocrAutoProcess, options.ocrAutoProcessImages);
this.$ocrLanguage.val(options.ocrLanguage || "eng");
this.$ocrMinConfidence.val(options.ocrMinConfidence || "0.6");

this.setImageCompression();
this.setOcrVisibility();
}

setImageCompression() {
Expand All @@ -95,4 +211,81 @@ export default class ImageOptions extends OptionsWidget {
this.$imageCompressionWrapper.addClass("disabled-field");
}
}

setOcrVisibility() {
if (this.$ocrEnabled.prop("checked")) {
this.$ocrSettingsWrapper.removeClass("disabled-field");
} else {
this.$ocrSettingsWrapper.addClass("disabled-field");
}
}

async startBatchOcr() {
this.$batchOcrButton.prop("disabled", true);
this.$batchOcrProgress.show();
this.$batchOcrProgressBar.css("width", "0%");
this.$batchOcrStatus.text(t("images.batch_ocr_starting"));

try {
const result = await server.post("ocr/batch-process") as {
success: boolean;
message?: string;
};

if (result.success) {
this.pollBatchOcrProgress();
} else {
throw new Error(result.message || "Failed to start batch OCR");
}
} catch (error: any) {
console.error("Error starting batch OCR:", error);
this.$batchOcrStatus.text(t("images.batch_ocr_error", { error: error.message }));
toastService.showError(`Failed to start batch OCR: ${error.message}`);
this.$batchOcrButton.prop("disabled", false);
}
}

async pollBatchOcrProgress() {
try {
const result = await server.get("ocr/batch-progress") as {
inProgress: boolean;
total: number;
processed: number;
};

if (result.inProgress) {
const progress = (result.processed / result.total) * 100;
this.$batchOcrProgressBar.css("width", `${progress}%`);
this.$batchOcrStatus.text(t("images.batch_ocr_progress", {
processed: result.processed,
total: result.total
}));

// Continue polling
setTimeout(() => this.pollBatchOcrProgress(), 1000);
} else {
// Batch OCR completed
this.$batchOcrProgressBar.css("width", "100%");
this.$batchOcrStatus.text(t("images.batch_ocr_completed", {
processed: result.processed,
total: result.total
}));
this.$batchOcrButton.prop("disabled", false);
toastService.showMessage(t("images.batch_ocr_completed", {
processed: result.processed,
total: result.total
}));

// Hide progress after 3 seconds
setTimeout(() => {
this.$batchOcrProgress.hide();
}, 3000);
}
} catch (error: any) {
console.error("Error polling batch OCR progress:", error);
this.$batchOcrStatus.text(t("images.batch_ocr_error", { error: error.message }));
toastService.showError(`Failed to get batch OCR progress: ${error.message}`);
this.$batchOcrButton.prop("disabled", false);
}
}
}
2 changes: 2 additions & 0 deletions apps/server/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"@types/stream-throttle": "0.1.4",
"@types/supertest": "6.0.3",
"@types/swagger-ui-express": "4.1.8",
"@types/tesseract.js": "2.0.0",
"@types/tmp": "0.2.6",
"@types/turndown": "5.0.5",
"@types/ws": "8.18.1",
Expand Down Expand Up @@ -102,6 +103,7 @@
"swagger-jsdoc": "6.2.8",
"swagger-ui-express": "5.0.1",
"time2fa": "^1.3.0",
"tesseract.js": "6.0.1",
"tmp": "0.2.3",
"turndown": "7.2.0",
"unescape": "1.0.1",
Expand Down
60 changes: 60 additions & 0 deletions apps/server/src/migrations/migrations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,66 @@

// Migrations should be kept in descending order, so the latest migration is first.
const MIGRATIONS: (SqlMigration | JsMigration)[] = [
// Add OCR results table for storing extracted text from images
{
version: 233,
sql: /*sql*/`\
-- Create OCR results table to store extracted text from images
CREATE TABLE IF NOT EXISTS ocr_results (
id INTEGER PRIMARY KEY AUTOINCREMENT,
entity_id TEXT NOT NULL,
entity_type TEXT NOT NULL DEFAULT 'note',
extracted_text TEXT NOT NULL,
confidence REAL NOT NULL,
language TEXT NOT NULL DEFAULT 'eng',
extracted_at TEXT NOT NULL,
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
UNIQUE(entity_id, entity_type)
);
Comment on lines +14 to +25
Copy link
Contributor

@eliandoran eliandoran Jun 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of storing the results in a separate table (that is not synced yet), how about simply storing the results in an attachment?

This:

  • Simplifies the table structure
  • Can easily filter for them by having a custom role.
  • Allows additional functionality such as being able to view the attachment in order to review how well the OCR went.
  • We can use the same blobs structure that is shared with notes and attachments.

The only problem is with image-attachments, since we can't have attachments for attachments.

Maybe we have to think a bit deeper, to see if it can be further reused on things other than OCR. Perhaps one way would be to have "different representations" for blobs, such as their binary data (e.g. images, files), but also a textual representation that can be used not only for images but also OCR, LLM, etc.

Copy link
Member Author

@perfectra1n perfectra1n Jun 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You definitely have the "whole picture" in mind - so do you think it's best to bite the bullet and create some new table/object that can be used/reused for objects (or even a more obscure "object", whatever that may be in the future) such as these, or do we just use something like a "sibling attachment" for now as you suggested?

-- Create indexes for better search performance
CREATE INDEX IF NOT EXISTS idx_ocr_results_entity
ON ocr_results (entity_id, entity_type);
CREATE INDEX IF NOT EXISTS idx_ocr_results_text
ON ocr_results (extracted_text);
CREATE INDEX IF NOT EXISTS idx_ocr_results_confidence
ON ocr_results (confidence);
-- Create full-text search index for extracted text
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's interesting but I suppose this creates additional complexity. We don't yet have full-text search for note content.

So I think we should try to find a solution (doesn't have to be in the same PR) that allows full text search on the entire blobs table.

CREATE VIRTUAL TABLE IF NOT EXISTS ocr_results_fts USING fts5(
entity_id UNINDEXED,
entity_type UNINDEXED,
extracted_text,
content='ocr_results',
content_rowid='id'
);
-- Create triggers to keep FTS table in sync
CREATE TRIGGER IF NOT EXISTS ocr_results_fts_insert
AFTER INSERT ON ocr_results
BEGIN
INSERT INTO ocr_results_fts(rowid, entity_id, entity_type, extracted_text)
VALUES (new.id, new.entity_id, new.entity_type, new.extracted_text);
END;
CREATE TRIGGER IF NOT EXISTS ocr_results_fts_update
AFTER UPDATE ON ocr_results
BEGIN
UPDATE ocr_results_fts
SET extracted_text = new.extracted_text
WHERE rowid = new.id;
END;
CREATE TRIGGER IF NOT EXISTS ocr_results_fts_delete
AFTER DELETE ON ocr_results
BEGIN
DELETE FROM ocr_results_fts WHERE rowid = old.id;
END;
`
},
// Remove embedding tables since LLM embedding functionality has been removed
{
version: 232,
Expand Down
Loading