chore(ocr): remove trainneddata artifact

refactor(ocr): use idiomatic status handling
fix(ocr): errors not properly shown due to lack of convention
2026-04-01 17:50:26 +02:00 · 2026-04-01 17:08:15 +03:00 · 2026-04-01 17:04:36 +03:00 · 2026-04-01 16:58:34 +03:00 · 2026-04-01 16:53:50 +03:00 · 2026-04-01 16:45:38 +03:00
47 changed files with 4221 additions and 150 deletions
--- a/apps/client/package.json
+++ b/apps/client/package.json
@@ -54,7 +54,7 @@
    "draggabilly": "3.0.0",
    "force-graph": "1.51.2",
    "globals": "17.4.0",
-    "i18next": "25.10.10",
+    "i18next": "26.0.1",
    "i18next-http-backend": "3.0.2",
    "jquery": "4.0.0",
    "jquery.fancytree": "2.38.5",
--- a/apps/client/src/components/app_context.ts
+++ b/apps/client/src/components/app_context.ts
@@ -302,6 +302,7 @@ export type CommandMappings = {
    ninthTab: CommandData;
    lastTab: CommandData;
    showNoteSource: CommandData;
+    showNoteOCRText: CommandData;
    showSQLConsole: CommandData;
    showBackendLog: CommandData;
    showCheatsheet: CommandData;
--- a/apps/client/src/components/root_command_executor.ts
+++ b/apps/client/src/components/root_command_executor.ts
@@ -148,6 +148,19 @@ export default class RootCommandExecutor extends Component {
        }
    }

+    async showNoteOCRTextCommand() {
+        const notePath = appContext.tabManager.getActiveContextNotePath();
+
+        if (notePath) {
+            await appContext.tabManager.openTabWithNoteWithHoisting(notePath, {
+                activate: true,
+                viewScope: {
+                    viewMode: "ocr"
+                }
+            });
+        }
+    }
+
    async showAttachmentsCommand() {
        const notePath = appContext.tabManager.getActiveContextNotePath();

--- a/apps/client/src/services/content_renderer.ts
+++ b/apps/client/src/services/content_renderer.ts
@@ -32,6 +32,7 @@ export interface RenderOptions {
    includeArchivedNotes?: boolean;
    /** Set of note IDs that have already been seen during rendering to prevent infinite recursion. */
    seenNoteIds?: Set<string>;
+    showTextRepresentation?: boolean;
 }

 const CODE_MIME_TYPES = new Set(["application/json"]);
@@ -55,7 +56,7 @@ export async function getRenderedContent(this: {} | { ctx: string }, entity: FNo
    } else if (type === "code") {
        await renderCode(entity, $renderedContent);
    } else if (["image", "canvas", "mindMap", "spreadsheet"].includes(type)) {
-        renderImage(entity, $renderedContent, options);
+        await renderImage(entity, $renderedContent, options);
    } else if (!options.tooltip && ["file", "pdf", "audio", "video"].includes(type)) {
        await renderFile(entity, type, $renderedContent);
    } else if (type === "mermaid") {
@@ -138,7 +139,7 @@ async function renderCode(note: FNote | FAttachment, $renderedContent: JQuery<HT
    await applySingleBlockSyntaxHighlight($codeBlock, normalizeMimeTypeForCKEditor(note.mime));
 }

-function renderImage(entity: FNote | FAttachment, $renderedContent: JQuery<HTMLElement>, options: RenderOptions = {}) {
+async function renderImage(entity: FNote | FAttachment, $renderedContent: JQuery<HTMLElement>, options: RenderOptions = {}) {
    const encodedTitle = encodeURIComponent(entity.title);

    let url;
@@ -178,9 +179,39 @@ function renderImage(entity: FNote | FAttachment, $renderedContent: JQuery<HTMLE
    }

    imageContextMenuService.setupContextMenu($img);
+
+    // Add OCR text display for image notes
+    if (entity instanceof FNote && options.showTextRepresentation) {
+        await addOCRTextIfAvailable(entity, $renderedContent);
+    }
 }

-async function renderFile(entity: FNote | FAttachment, type: string, $renderedContent: JQuery<HTMLElement>) {
+async function addOCRTextIfAvailable(note: FNote, $content: JQuery<HTMLElement>) {
+    try {
+        const response = await fetch(`api/ocr/notes/${note.noteId}/text`);
+        if (response.ok) {
+            const data = await response.json();
+            if (data.success && data.hasOcr && data.text) {
+                const $ocrSection = $(`
+                    <div class="ocr-text-section">
+                        <div class="ocr-header">
+                            <span class="bx bx-text"></span> ${t("ocr.extracted_text")}
+                        </div>
+                        <div class="ocr-content"></div>
+                    </div>
+                `);
+
+                $ocrSection.find('.ocr-content').text(data.text);
+                $content.append($ocrSection);
+            }
+        }
+    } catch (error) {
+        // Silently fail if OCR API is not available
+        console.debug('Failed to fetch OCR text:', error);
+    }
+}
+
+async function renderFile(entity: FNote | FAttachment, type: string, $renderedContent: JQuery<HTMLElement>, options: RenderOptions = {}) {
    let entityType, entityId;

    if (entity instanceof FNote) {
@@ -220,6 +251,11 @@ async function renderFile(entity: FNote | FAttachment, type: string, $renderedCo
        $content.append($videoPreview);
    }

+    // Add OCR text display for file notes
+    if (entity instanceof FNote && options.showTextRepresentation) {
+        await addOCRTextIfAvailable(entity, $content);
+    }
+
    if (entityType === "notes" && "noteId" in entity) {
        // TODO: we should make this available also for attachments, but there's a problem with "Open externally" support
        //       in attachment list
--- a/apps/client/src/services/i18n.ts
+++ b/apps/client/src/services/i18n.ts
@@ -24,8 +24,7 @@ export async function initLocale() {
        backend: {
            loadPath: `${window.glob.assetPath}/translations/{{lng}}/{{ns}}.json`
        },
-        returnEmptyString: false,
-        showSupportNotice: false
+        returnEmptyString: false
    });

    await setDayjsLocale(locale);
--- a/apps/client/src/services/link.ts
+++ b/apps/client/src/services/link.ts
@@ -28,7 +28,7 @@ async function getLinkIcon(noteId: string, viewMode: ViewMode | undefined) {
    return icon;
 }

-export type ViewMode = "default" | "source" | "attachments" | "contextual-help" | "note-map";
+export type ViewMode = "default" | "source" | "attachments" | "contextual-help" | "note-map" | "ocr";

 export interface ViewScope {
    /**
--- a/apps/client/src/services/server.ts
+++ b/apps/client/src/services/server.ts
@@ -270,7 +270,11 @@ function ajax(url: string, method: string, data: unknown, headers: Headers, opts
                } else if (opts.silentInternalServerError && jqXhr.status === 500) {
                    // report nothing
                } else {
-                    await reportError(method, url, jqXhr.status, jqXhr.responseText);
+                    try {
+                        await reportError(method, url, jqXhr.status, jqXhr.responseText);
+                    } catch {
+                        // reportError may throw (e.g. ValidationError); ensure rej() is still called below.
+                    }
                }

                rej(jqXhr.responseText);
--- a/apps/client/src/stylesheets/style.css
+++ b/apps/client/src/stylesheets/style.css
@@ -2641,3 +2641,26 @@ iframe.print-iframe {
    min-height: 50px;
    align-items: center;
 }
+
+.ocr-text-section {
+    margin: 10px 0;
+    padding: 10px;
+    background: var(--accented-background-color);
+    border-left: 3px solid var(--main-border-color);
+    text-align: left;
+}
+
+.ocr-header {
+    font-weight: bold;
+    margin-bottom: 8px;
+    font-size: 0.9em;
+    color: var(--muted-text-color);
+}
+
+.ocr-content {
+    max-height: 150px;
+    overflow-y: auto;
+    font-size: 0.9em;
+    line-height: 1.4;
+    white-space: pre-wrap;
+}
--- a/apps/client/src/translations/en/translation.json
+++ b/apps/client/src/translations/en/translation.json
@@ -691,6 +691,7 @@
    "search_in_note": "Search in note",
    "note_source": "Note source",
    "note_attachments": "Note attachments",
+    "view_ocr_text": "View OCR text",
    "open_note_externally": "Open note externally",
    "open_note_externally_title": "File will be open in an external application and watched for changes. You'll then be able to upload the modified version back to Trilium.",
    "open_note_custom": "Open note custom",
@@ -1259,7 +1260,22 @@
    "enable_image_compression": "Enable image compression",
    "max_image_dimensions": "Max width / height of an image (image will be resized if it exceeds this setting).",
    "max_image_dimensions_unit": "pixels",
-    "jpeg_quality_description": "JPEG quality (10 - worst quality, 100 - best quality, 50 - 85 is recommended)"
+    "jpeg_quality_description": "JPEG quality (10 - worst quality, 100 - best quality, 50 - 85 is recommended)",
+    "ocr_section_title": "Optical Character Recognition (OCR)",
+    "enable_ocr": "Enable OCR for images",
+    "ocr_description": "Automatically extract text from images using OCR technology. This makes image content searchable within your notes.",
+    "ocr_auto_process": "Automatically process new images with OCR",
+    "ocr_language": "OCR Language",
+    "ocr_min_confidence": "Minimum confidence threshold",
+    "ocr_confidence_unit": "(0.0-1.0)",
+    "ocr_confidence_description": "Only extract text with confidence above this threshold. Lower values include more text but may be less accurate.",
+    "batch_ocr_title": "Process Existing Images",
+    "batch_ocr_description": "Process all existing images in your notes with OCR. This may take some time depending on the number of images.",
+    "batch_ocr_start": "Start Batch OCR Processing",
+    "batch_ocr_starting": "Starting batch OCR processing...",
+    "batch_ocr_progress": "Processing {{processed}} of {{total}} images...",
+    "batch_ocr_completed": "Batch OCR completed! Processed {{processed}} images.",
+    "batch_ocr_error": "Error during batch OCR: {{error}}"
  },
  "attachment_erasure_timeout": {
    "attachment_erasure_timeout": "Attachment Erasure Timeout",
@@ -2067,6 +2083,20 @@
  "calendar_view": {
    "delete_note": "Delete note..."
  },
+  "ocr": {
+    "extracted_text": "Extracted Text (OCR)",
+    "extracted_text_title": "Extracted Text (OCR)",
+    "loading_text": "Loading OCR text...",
+    "no_text_available": "No OCR text available",
+    "no_text_explanation": "This note has not been processed for OCR text extraction or no text was found.",
+    "failed_to_load": "Failed to load OCR text",
+    "extracted_on": "Extracted on: {{date}}",
+    "unknown_date": "Unknown",
+    "process_now": "Process OCR",
+    "processing": "Processing...",
+    "processing_started": "OCR processing has been started. Please wait a moment and refresh.",
+    "processing_failed": "Failed to start OCR processing"
+  },
  "command_palette": {
    "tree-action-name": "Tree: {{name}}",
    "export_note_title": "Export Note",
--- a/apps/client/src/widgets/NoteDetail.tsx
+++ b/apps/client/src/widgets/NoteDetail.tsx
@@ -336,6 +336,8 @@ export async function getExtendedWidgetType(note: FNote | null | undefined, note

    if (noteContext?.viewScope?.viewMode === "source") {
        resultingType = "readOnlyCode";
+    } else if (noteContext.viewScope?.viewMode === "ocr") {
+        resultingType = "readOnlyOCRText";
    } else if (noteContext.viewScope?.viewMode === "attachments") {
        resultingType = noteContext.viewScope.attachmentId ? "attachmentDetail" : "attachmentList";
    } else if (noteContext.viewScope?.viewMode === "note-map") {
--- a/apps/client/src/widgets/collections/board/data.spec.ts
+++ b/apps/client/src/widgets/collections/board/data.spec.ts
@@ -1,8 +1,9 @@
-import { it, describe, expect } from "vitest";
-import { buildNote } from "../../../test/easy-froca";
-import { getBoardData } from "./data";
+import { describe, expect,it } from "vitest";
+
 import FBranch from "../../../entities/fbranch";
 import froca from "../../../services/froca";
+import { buildNote } from "../../../test/easy-froca";
+import { getBoardData } from "./data";

 describe("Board data", () => {
    it("deduplicates cloned notes", async () => {
--- a/apps/client/src/widgets/mobile_widgets/TabSwitcher.tsx
+++ b/apps/client/src/widgets/mobile_widgets/TabSwitcher.tsx
@@ -27,6 +27,7 @@ const VIEW_MODE_ICON_MAPPINGS: Record<Exclude<ViewMode, "default">, string> = {
    "contextual-help": "bx bx-help-circle",
    "note-map": "bx bxs-network-chart",
    attachments: "bx bx-paperclip",
+    ocr: "bx bx-text"
 };

 export default function TabSwitcher() {
--- a/apps/client/src/widgets/note_types.tsx
+++ b/apps/client/src/widgets/note_types.tsx
@@ -12,7 +12,7 @@ import { TypeWidgetProps } from "./type_widgets/type_widget";
 * A `NoteType` altered by the note detail widget, taking into consideration whether the note is editable or not and adding special note types such as an empty one,
 * for protected session or attachment information.
 */
-export type ExtendedNoteType = Exclude<NoteType, "launcher" | "text" | "code" | "llmChat"> | "empty" | "readOnlyCode" | "readOnlyText" | "editableText" | "editableCode" | "attachmentDetail" | "attachmentList" |  "protectedSession" | "sqlConsole" | "llmChat";
+export type ExtendedNoteType = Exclude<NoteType, "launcher" | "text" | "code" | "llmChat"> | "empty" | "readOnlyCode" | "readOnlyText" | "readOnlyOCRText" | "editableText" | "editableCode" | "attachmentDetail" | "attachmentList" |  "protectedSession" | "sqlConsole" | "llmChat";

 export type TypeWidget = ((props: TypeWidgetProps) => VNode | JSX.Element | undefined);
 type NoteTypeView = () => (Promise<{ default: TypeWidget } | TypeWidget> | TypeWidget);
@@ -78,6 +78,11 @@ export const TYPE_MAPPINGS: Record<ExtendedNoteType, NoteTypeMapping> = {
        className: "note-detail-readonly-code",
        printable: true
    },
+    readOnlyOCRText: {
+        view: () => import("./type_widgets/ReadOnlyTextRepresentation"),
+        className: "note-detail-ocr-text",
+        printable: true
+    },
    editableCode: {
        view: async () => (await import("./type_widgets/code/Code")).EditableCode,
        className: "note-detail-code",
--- a/apps/client/src/widgets/ribbon/NoteActions.tsx
+++ b/apps/client/src/widgets/ribbon/NoteActions.tsx
@@ -162,6 +162,7 @@ export function NoteContextMenu({ note, noteContext, itemsAtStart, itemsNearNote
                <CommandItem command="openNoteExternally" icon="bx bx-file-find" disabled={isSearchOrBook || !isElectron} text={t("note_actions.open_note_externally")} title={t("note_actions.open_note_externally_title")} />
                <CommandItem command="openNoteCustom" icon="bx bx-customize" disabled={isSearchOrBook || isMac || !isElectron} text={t("note_actions.open_note_custom")} />
                <CommandItem command="showNoteSource" icon="bx bx-code" disabled={!hasSource} text={t("note_actions.note_source")} />
+                <CommandItem command="showNoteOCRText" icon="bx bx-text" disabled={!["image", "file"].includes(noteType)} text={t("note_actions.view_ocr_text")} />
                {(syncServerHost && isElectron) &&
                    <CommandItem command="openNoteOnServer" icon="bx bx-world" disabled={!syncServerHost} text={t("note_actions.open_note_on_server")} />
                }
--- a/apps/client/src/widgets/type_widgets/ReadOnlyTextRepresentation.tsx
+++ b/apps/client/src/widgets/type_widgets/ReadOnlyTextRepresentation.tsx
@@ -0,0 +1,145 @@
+import { useEffect, useState } from "preact/hooks";
+
+import { t } from "../../services/i18n";
+import server from "../../services/server";
+import toast from "../../services/toast";
+import { TypeWidgetProps } from "./type_widget";
+
+interface TextRepresentationResponse {
+    success: boolean;
+    text: string;
+    hasOcr: boolean;
+    extractedAt: string | null;
+    message?: string;
+}
+
+type State =
+    | { kind: "loading" }
+    | { kind: "loaded"; text: string; extractedAt: string | null }
+    | { kind: "empty" }
+    | { kind: "error"; message: string };
+
+export default function ReadOnlyTextRepresentation({ note }: TypeWidgetProps) {
+    const [ state, setState ] = useState<State>({ kind: "loading" });
+    const [ processing, setProcessing ] = useState(false);
+
+    async function fetchText() {
+        setState({ kind: "loading" });
+
+        try {
+            const response = await server.get<TextRepresentationResponse>(`ocr/notes/${note.noteId}/text`);
+
+            if (!response.success) {
+                setState({ kind: "error", message: response.message || t("ocr.failed_to_load") });
+                return;
+            }
+
+            if (!response.hasOcr || !response.text) {
+                setState({ kind: "empty" });
+                return;
+            }
+
+            setState({ kind: "loaded", text: response.text, extractedAt: response.extractedAt });
+        } catch (error: any) {
+            console.error("Error loading text representation:", error);
+            setState({ kind: "error", message: error.message || t("ocr.failed_to_load") });
+        }
+    }
+
+    useEffect(() => { fetchText(); }, [ note.noteId ]);
+
+    async function processOCR() {
+        setProcessing(true);
+        try {
+            const response = await server.post<{ success: boolean; message?: string }>(`ocr/process-note/${note.noteId}`);
+            if (response.success) {
+                toast.showMessage(t("ocr.processing_started"));
+                setTimeout(fetchText, 2000);
+            } else {
+                toast.showError(response.message || t("ocr.processing_failed"));
+            }
+        } catch {
+            // Server errors (4xx/5xx) are already shown as toasts by server.ts.
+        } finally {
+            setProcessing(false);
+        }
+    }
+
+    return (
+        <div className="note-detail-printable" style={{ padding: "10px" }}>
+            <div style={{
+                marginBottom: "10px",
+                padding: "8px 12px",
+                backgroundColor: "var(--main-background-color)",
+                border: "1px solid var(--main-border-color)",
+                borderRadius: "4px",
+                fontWeight: 500
+            }}>
+                <span className="bx bx-text" />{" "}{t("ocr.extracted_text_title")}
+            </div>
+
+            {state.kind === "loading" && (
+                <div style={{ textAlign: "center", padding: "30px", color: "var(--muted-text-color)" }}>
+                    <span className="bx bx-loader-alt bx-spin" />{" "}{t("ocr.loading_text")}
+                </div>
+            )}
+
+            {state.kind === "loaded" && (
+                <>
+                    <pre style={{
+                        whiteSpace: "pre-wrap",
+                        fontFamily: "var(--detail-text-font-family)",
+                        fontSize: "var(--detail-text-font-size)",
+                        lineHeight: 1.6,
+                        border: "1px solid var(--main-border-color)",
+                        borderRadius: "4px",
+                        padding: "15px",
+                        backgroundColor: "var(--accented-background-color)",
+                        minHeight: "100px"
+                    }}>
+                        {state.text}
+                    </pre>
+                    <div style={{ fontSize: "0.9em", color: "var(--muted-text-color)", marginTop: "10px", fontStyle: "italic" }}>
+                        {t("ocr.extracted_on", { date: state.extractedAt ? new Date(state.extractedAt).toLocaleString() : t("ocr.unknown_date") })}
+                    </div>
+                </>
+            )}
+
+            {state.kind === "empty" && (
+                <>
+                    <div style={{ color: "var(--muted-text-color)", fontStyle: "italic", textAlign: "center", padding: "30px" }}>
+                        <span className="bx bx-info-circle" />{" "}{t("ocr.no_text_available")}
+                    </div>
+                    <button
+                        type="button"
+                        className="btn btn-secondary"
+                        style={{ marginTop: "15px" }}
+                        disabled={processing}
+                        onClick={processOCR}
+                    >
+                        {processing
+                            ? <><span className="bx bx-loader-alt bx-spin" />{" "}{t("ocr.processing")}</>
+                            : <><span className="bx bx-play" />{" "}{t("ocr.process_now")}</>
+                        }
+                    </button>
+                    <div style={{ fontSize: "0.9em", color: "var(--muted-text-color)", marginTop: "10px", fontStyle: "italic" }}>
+                        {t("ocr.no_text_explanation")}
+                    </div>
+                </>
+            )}
+
+            {state.kind === "error" && (
+                <div style={{
+                    color: "var(--error-color)",
+                    backgroundColor: "var(--error-background-color)",
+                    border: "1px solid var(--error-border-color)",
+                    padding: "10px",
+                    borderRadius: "4px",
+                    marginTop: "10px"
+                }}>
+                    <span className="bx bx-error" />{" "}{state.message}
+                </div>
+            )}
+        </div>
+    );
+}
--- a/apps/server/package.json
+++ b/apps/server/package.json
@@ -68,6 +68,7 @@
    "@types/serve-static": "2.2.0",
    "@types/stream-throttle": "0.1.4",
    "@types/supertest": "7.2.0",
+    "@types/tesseract.js": "2.0.0",
    "@types/tmp": "0.2.6",
    "@types/turndown": "5.0.6",
    "@types/ws": "8.18.1",
@@ -115,16 +116,20 @@
    "mime-types": "3.0.2",
    "multer": "2.1.1",
    "normalize-strings": "1.1.1",
+    "officeparser": "5.2.0",
+    "pdf-parse": "1.1.1",
    "rand-token": "1.0.1",
    "safe-compare": "1.1.4",
    "sanitize-filename": "1.6.4",
    "sanitize-html": "2.17.2",
    "sax": "1.6.0",
    "serve-favicon": "2.5.1",
+    "sharp": "0.34.3",
    "stream-throttle": "0.1.3",
    "strip-bom": "5.0.0",
    "striptags": "3.2.0",
    "supertest": "7.2.2",
+    "tesseract.js": "6.0.1",
    "swagger-jsdoc": "6.2.8",
    "time2fa": "1.4.2",
    "tmp": "0.2.5",
--- a/apps/server/src/assets/db/schema.sql
+++ b/apps/server/src/assets/db/schema.sql
@@ -107,6 +107,8 @@ CREATE TABLE IF NOT EXISTS "recent_notes"
 CREATE TABLE IF NOT EXISTS "blobs" (
                                               `blobId`	TEXT NOT NULL,
                                               `content`	TEXT NULL DEFAULT NULL,
+                                               `textRepresentation` TEXT DEFAULT NULL,
+                                               `textExtractionLastProcessed` TEXT DEFAULT NULL,
                                               `dateModified` TEXT NOT NULL,
                                               `utcDateModified` TEXT NOT NULL,
                                               PRIMARY KEY(`blobId`)
--- a/apps/server/src/becca/entities/bblob.ts
+++ b/apps/server/src/becca/entities/bblob.ts
@@ -10,11 +10,12 @@ class BBlob extends AbstractBeccaEntity<BBlob> {
        return "blobId";
    }
    static get hashedProperties() {
-        return ["blobId", "content"];
+        return ["blobId", "content", "textRepresentation"];
    }

    content!: string | Buffer;
    contentLength!: number;
+    textRepresentation?: string | null;

    constructor(row: BlobRow) {
        super();
@@ -25,6 +26,7 @@ class BBlob extends AbstractBeccaEntity<BBlob> {
        this.blobId = row.blobId;
        this.content = row.content;
        this.contentLength = row.contentLength;
+        this.textRepresentation = row.textRepresentation;
        this.dateModified = row.dateModified;
        this.utcDateModified = row.utcDateModified;
    }
@@ -34,6 +36,7 @@ class BBlob extends AbstractBeccaEntity<BBlob> {
            blobId: this.blobId,
            content: this.content || null,
            contentLength: this.contentLength,
+            textRepresentation: this.textRepresentation || null,
            dateModified: this.dateModified,
            utcDateModified: this.utcDateModified
        };
--- a/apps/server/src/migrations/migrations.ts
+++ b/apps/server/src/migrations/migrations.ts
@@ -6,6 +6,25 @@

 // Migrations should be kept in descending order, so the latest migration is first.
 const MIGRATIONS: (SqlMigration | JsMigration)[] = [
+    // Add text representation column and last processed timestamp to blobs table
+    {
+        version: 236,
+        sql: /*sql*/`\
+            -- Add text representation column to blobs table
+            ALTER TABLE blobs ADD COLUMN textRepresentation TEXT DEFAULT NULL;
+
+            -- Add OCR last processed timestamp to blobs table
+            ALTER TABLE blobs ADD COLUMN textExtractionLastProcessed TEXT DEFAULT NULL;
+
+            -- Create index for text representation searches
+            CREATE INDEX IF NOT EXISTS idx_blobs_textRepresentation
+            ON blobs (textRepresentation);
+
+            -- Create index for OCR last processed timestamp
+            CREATE INDEX IF NOT EXISTS idx_blobs_textExtractionLastProcessed
+            ON blobs (textExtractionLastProcessed);
+        `
+    },
    // Add missing database indices for query performance
    {
        version: 235,
--- a/apps/server/src/routes/api/ocr.spec.ts
+++ b/apps/server/src/routes/api/ocr.spec.ts
@@ -0,0 +1,75 @@
+import { describe, expect, it, vi, beforeEach } from "vitest";
+import ocrRoutes from "./ocr.js";
+
+// Mock the OCR service
+vi.mock("../../services/ocr/ocr_service.js", () => ({
+    default: {
+        isOCREnabled: vi.fn(() => true),
+        startBatchProcessing: vi.fn(() => Promise.resolve({ success: true })),
+        getBatchProgress: vi.fn(() => ({ inProgress: false, total: 0, processed: 0 }))
+    }
+}));
+
+// Mock becca
+vi.mock("../../becca/becca.js", () => ({
+    default: {}
+}));
+
+// Mock log
+vi.mock("../../services/log.js", () => ({
+    default: {
+        error: vi.fn()
+    }
+}));
+
+describe("OCR API", () => {
+    let mockRequest: any;
+    let mockResponse: any;
+
+    beforeEach(() => {
+        mockRequest = {
+            params: {},
+            body: {},
+            query: {}
+        };
+
+        mockResponse = {
+            status: vi.fn().mockReturnThis(),
+            json: vi.fn().mockReturnThis(),
+            triliumResponseHandled: false
+        };
+    });
+
+    it("should set triliumResponseHandled flag in batch processing", async () => {
+        await ocrRoutes.batchProcessOCR(mockRequest, mockResponse);
+
+        expect(mockResponse.json).toHaveBeenCalledWith({ success: true });
+        expect(mockResponse.triliumResponseHandled).toBe(true);
+    });
+
+    it("should set triliumResponseHandled flag in get batch progress", async () => {
+        await ocrRoutes.getBatchProgress(mockRequest, mockResponse);
+
+        expect(mockResponse.json).toHaveBeenCalledWith({ 
+            inProgress: false, 
+            total: 0, 
+            processed: 0 
+        });
+        expect(mockResponse.triliumResponseHandled).toBe(true);
+    });
+
+    it("should handle errors and set triliumResponseHandled flag", async () => {
+        // Mock service to throw error
+        const ocrService = await import("../../services/ocr/ocr_service.js");
+        vi.mocked(ocrService.default.startBatchProcessing).mockRejectedValueOnce(new Error("Test error"));
+
+        await ocrRoutes.batchProcessOCR(mockRequest, mockResponse);
+
+        expect(mockResponse.status).toHaveBeenCalledWith(500);
+        expect(mockResponse.json).toHaveBeenCalledWith({
+            success: false,
+            error: "Test error"
+        });
+        expect(mockResponse.triliumResponseHandled).toBe(true);
+    });
+});
--- a/apps/server/src/routes/api/ocr.ts
+++ b/apps/server/src/routes/api/ocr.ts
@@ -0,0 +1,324 @@
+import type { Request } from "express";
+
+import becca from "../../becca/becca.js";
+import ocrService from "../../services/ocr/ocr_service.js";
+import sql from "../../services/sql.js";
+
+/**
+ * @swagger
+ * /api/ocr/process-note/{noteId}:
+ *   post:
+ *     summary: Process OCR for a specific note
+ *     operationId: ocr-process-note
+ *     parameters:
+ *       - name: noteId
+ *         in: path
+ *         required: true
+ *         schema:
+ *           type: string
+ *         description: ID of the note to process
+ *     requestBody:
+ *       required: false
+ *       content:
+ *         application/json:
+ *           schema:
+ *             type: object
+ *             properties:
+ *               language:
+ *                 type: string
+ *                 description: OCR language code (e.g. 'eng', 'fra', 'deu')
+ *                 default: 'eng'
+ *               forceReprocess:
+ *                 type: boolean
+ *                 description: Force reprocessing even if OCR already exists
+ *                 default: false
+ *     responses:
+ *       '200':
+ *         description: OCR processing completed successfully
+ *       '400':
+ *         description: Bad request - OCR disabled or unsupported file type
+ *       '404':
+ *         description: Note not found
+ *       '500':
+ *         description: Internal server error
+ *     security:
+ *       - session: []
+ *     tags: ["ocr"]
+ */
+async function processNoteOCR(req: Request<{ noteId: string }>) {
+    const { noteId } = req.params;
+    const { language = 'eng', forceReprocess = false } = req.body || {};
+
+    if (!ocrService.isOCREnabled()) {
+        return [400, { success: false, message: 'OCR is not enabled in settings' }];
+    }
+
+    const note = becca.getNote(noteId);
+    if (!note) {
+        return [404, { success: false, message: 'Note not found' }];
+    }
+
+    const result = await ocrService.processNoteOCR(noteId, { language, forceReprocess });
+    if (!result) {
+        return [400, { success: false, message: 'Note is not an image or has unsupported format' }];
+    }
+
+    return { success: true, result };
+}
+
+/**
+ * @swagger
+ * /api/ocr/process-attachment/{attachmentId}:
+ *   post:
+ *     summary: Process OCR for a specific attachment
+ *     operationId: ocr-process-attachment
+ *     parameters:
+ *       - name: attachmentId
+ *         in: path
+ *         required: true
+ *         schema:
+ *           type: string
+ *         description: ID of the attachment to process
+ *     requestBody:
+ *       required: false
+ *       content:
+ *         application/json:
+ *           schema:
+ *             type: object
+ *             properties:
+ *               language:
+ *                 type: string
+ *                 description: OCR language code (e.g. 'eng', 'fra', 'deu')
+ *                 default: 'eng'
+ *               forceReprocess:
+ *                 type: boolean
+ *                 description: Force reprocessing even if OCR already exists
+ *                 default: false
+ *     responses:
+ *       '200':
+ *         description: OCR processing completed successfully
+ *       '400':
+ *         description: Bad request - OCR disabled or unsupported file type
+ *       '404':
+ *         description: Attachment not found
+ *       '500':
+ *         description: Internal server error
+ *     security:
+ *       - session: []
+ *     tags: ["ocr"]
+ */
+async function processAttachmentOCR(req: Request<{ attachmentId: string }>) {
+    const { attachmentId } = req.params;
+    const { language = 'eng', forceReprocess = false } = req.body || {};
+
+    if (!ocrService.isOCREnabled()) {
+        return [400, { success: false, message: 'OCR is not enabled in settings' }];
+    }
+
+    const attachment = becca.getAttachment(attachmentId);
+    if (!attachment) {
+        return [404, { success: false, message: 'Attachment not found' }];
+    }
+
+    const result = await ocrService.processAttachmentOCR(attachmentId, { language, forceReprocess });
+    if (!result) {
+        return [400, { success: false, message: 'Attachment is not an image or has unsupported format' }];
+    }
+
+    return { success: true, result };
+}
+
+/**
+ * @swagger
+ * /api/ocr/search:
+ *   get:
+ *     summary: Search for text in OCR results
+ *     operationId: ocr-search
+ *     parameters:
+ *       - name: q
+ *         in: query
+ *         required: true
+ *         schema:
+ *           type: string
+ *         description: Search query text
+ *     responses:
+ *       '200':
+ *         description: Search results
+ *       '400':
+ *         description: Bad request - missing search query
+ *       '500':
+ *         description: Internal server error
+ *     security:
+ *       - session: []
+ *     tags: ["ocr"]
+ */
+async function searchOCR(req: Request) {
+    const { q: searchText } = req.query;
+
+    if (!searchText || typeof searchText !== 'string') {
+        return [400, { success: false, message: 'Search query is required' }];
+    }
+
+    const results = ocrService.searchOCRResults(searchText);
+    return { success: true, results };
+}
+
+/**
+ * @swagger
+ * /api/ocr/batch-process:
+ *   post:
+ *     summary: Process OCR for all images without existing OCR results
+ *     operationId: ocr-batch-process
+ *     responses:
+ *       '200':
+ *         description: Batch processing initiated successfully
+ *       '400':
+ *         description: Bad request - OCR disabled or already processing
+ *       '500':
+ *         description: Internal server error
+ *     security:
+ *       - session: []
+ *     tags: ["ocr"]
+ */
+async function batchProcessOCR() {
+    const result = await ocrService.startBatchProcessing();
+    if (!result.success) {
+        return [400, result];
+    }
+    return result;
+}
+
+/**
+ * @swagger
+ * /api/ocr/batch-progress:
+ *   get:
+ *     summary: Get batch OCR processing progress
+ *     operationId: ocr-batch-progress
+ *     responses:
+ *       '200':
+ *         description: Batch processing progress information
+ *       '500':
+ *         description: Internal server error
+ *     security:
+ *       - session: []
+ *     tags: ["ocr"]
+ */
+async function getBatchProgress() {
+    return ocrService.getBatchProgress();
+}
+
+/**
+ * @swagger
+ * /api/ocr/stats:
+ *   get:
+ *     summary: Get OCR processing statistics
+ *     operationId: ocr-get-stats
+ *     responses:
+ *       '200':
+ *         description: OCR statistics
+ *       '500':
+ *         description: Internal server error
+ *     security:
+ *       - session: []
+ *     tags: ["ocr"]
+ */
+async function getOCRStats() {
+    return { success: true, stats: ocrService.getOCRStats() };
+}
+
+/**
+ * @swagger
+ * /api/ocr/delete/{blobId}:
+ *   delete:
+ *     summary: Delete OCR results for a specific blob
+ *     operationId: ocr-delete-results
+ *     parameters:
+ *       - name: blobId
+ *         in: path
+ *         required: true
+ *         schema:
+ *           type: string
+ *         description: ID of the blob
+ *     responses:
+ *       '200':
+ *         description: OCR results deleted successfully
+ *       '400':
+ *         description: Bad request - invalid parameters
+ *       '500':
+ *         description: Internal server error
+ *     security:
+ *       - session: []
+ *     tags: ["ocr"]
+ */
+async function deleteOCRResults(req: Request<{ blobId: string }>) {
+    const { blobId } = req.params;
+
+    ocrService.deleteOCRResult(blobId);
+    return { success: true, message: `OCR results deleted for blob ${blobId}` };
+}
+
+/**
+ * @swagger
+ * /api/ocr/notes/{noteId}/text:
+ *   get:
+ *     summary: Get OCR text for a specific note
+ *     operationId: ocr-get-note-text
+ *     parameters:
+ *       - name: noteId
+ *         in: path
+ *         required: true
+ *         schema:
+ *           type: string
+ *         description: Note ID to get OCR text for
+ *     responses:
+ *       200:
+ *         description: OCR text retrieved successfully
+ *       404:
+ *         description: Note not found
+ *     tags: ["ocr"]
+ */
+async function getNoteOCRText(req: Request<{ noteId: string }>) {
+    const { noteId } = req.params;
+
+    const note = becca.getNote(noteId);
+    if (!note) {
+        return [404, { success: false, message: 'Note not found' }];
+    }
+
+    let ocrText: string | null = null;
+    let extractedAt: string | null = null;
+
+    if (note.blobId) {
+        const result = sql.getRow<{
+            textRepresentation: string | null;
+            textExtractionLastProcessed: string | null;
+        }>(`
+            SELECT textRepresentation, textExtractionLastProcessed
+            FROM blobs
+            WHERE blobId = ?
+        `, [note.blobId]);
+
+        if (result) {
+            ocrText = result.textRepresentation;
+            extractedAt = result.textExtractionLastProcessed;
+        }
+    }
+
+    return {
+        success: true,
+        text: ocrText || '',
+        hasOcr: !!ocrText,
+        extractedAt
+    };
+}
+
+export default {
+    processNoteOCR,
+    processAttachmentOCR,
+    searchOCR,
+    batchProcessOCR,
+    getBatchProgress,
+    getOCRStats,
+    deleteOCRResults,
+    getNoteOCRText
+};
--- a/apps/server/src/routes/api/options.ts
+++ b/apps/server/src/routes/api/options.ts
@@ -105,7 +105,13 @@ const ALLOWED_OPTIONS = new Set<OptionNames>([
    "newLayout",
    "mfaEnabled",
    "mfaMethod",
-    "llmProviders"
+    "llmProviders",
+
+    // OCR options
+    "ocrEnabled",
+    "ocrLanguage",
+    "ocrAutoProcessImages",
+    "ocrMinConfidence"
 ]);

 function getOptions() {
--- a/apps/server/src/routes/routes.ts
+++ b/apps/server/src/routes/routes.ts
@@ -39,6 +39,7 @@ import loginApiRoute from "./api/login.js";
 import metricsRoute from "./api/metrics.js";
 import noteMapRoute from "./api/note_map.js";
 import notesApiRoute from "./api/notes.js";
+import ocrRoute from "./api/ocr.js";
 import optionsApiRoute from "./api/options.js";
 import otherRoute from "./api/other.js";
 import passwordApiRoute from "./api/password.js";
@@ -376,6 +377,16 @@ function register(app: express.Application) {
    etapiBackupRoute.register(router);
    etapiMetricsRoute.register(router);

+    // OCR API
+    asyncApiRoute(PST, "/api/ocr/process-note/:noteId", ocrRoute.processNoteOCR);
+    asyncApiRoute(PST, "/api/ocr/process-attachment/:attachmentId", ocrRoute.processAttachmentOCR);
+    asyncApiRoute(GET, "/api/ocr/search", ocrRoute.searchOCR);
+    asyncApiRoute(PST, "/api/ocr/batch-process", ocrRoute.batchProcessOCR);
+    asyncApiRoute(GET, "/api/ocr/batch-progress", ocrRoute.getBatchProgress);
+    asyncApiRoute(GET, "/api/ocr/stats", ocrRoute.getOCRStats);
+    asyncApiRoute(DEL, "/api/ocr/delete/:blobId", ocrRoute.deleteOCRResults);
+    asyncApiRoute(GET, "/api/ocr/notes/:noteId/text", ocrRoute.getNoteOCRText);
+
    app.use("", router);
 }

--- a/apps/server/src/services/app_info.ts
+++ b/apps/server/src/services/app_info.ts
@@ -5,7 +5,7 @@ import packageJson from "../../package.json" with { type: "json" };
 import build from "./build.js";
 import dataDir from "./data_dir.js";

-const APP_DB_VERSION = 235;
+const APP_DB_VERSION = 236;
 const SYNC_VERSION = 37;
 const CLIPPER_PROTOCOL_VERSION = "1.0";

--- a/apps/server/src/services/handlers.ts
+++ b/apps/server/src/services/handlers.ts
@@ -6,6 +6,9 @@ import becca from "../becca/becca.js";
 import BAttribute from "../becca/entities/battribute.js";
 import hiddenSubtreeService from "./hidden_subtree.js";
 import oneTimeTimer from "./one_time_timer.js";
+import ocrService from "./ocr/ocr_service.js";
+import optionService from "./options.js";
+import log from "./log.js";
 import type BNote from "../becca/entities/bnote.js";
 import type AbstractBeccaEntity from "../becca/entities/abstract_becca_entity.js";
 import type { DefinitionObject } from "./promoted_attribute_definition_interface.js";
@@ -137,6 +140,25 @@ eventService.subscribe(eventService.ENTITY_CREATED, ({ entityName, entity }) =>
        }
    } else if (entityName === "notes") {
        runAttachedRelations(entity, "runOnNoteCreation", entity);
+
+        // Note: OCR processing for images is now handled in image.ts during image processing
+        // OCR processing for files remains here since they don't go through image processing
+        // Only auto-process if both OCR is enabled and auto-processing is enabled
+        if (entity.type === 'file' && ocrService.isOCREnabled() && optionService.getOptionBool("ocrAutoProcessImages")) {
+            // Check if the file MIME type is supported by any OCR processor
+            const supportedMimeTypes = ocrService.getAllSupportedMimeTypes();
+
+            if (entity.mime && supportedMimeTypes.includes(entity.mime)) {
+                // Process OCR asynchronously to avoid blocking note creation
+                ocrService.processNoteOCR(entity.noteId).then(result => {
+                    if (result) {
+                        log.info(`Automatically processed OCR for file note ${entity.noteId} with MIME type ${entity.mime}`);
+                    }
+                }).catch(error => {
+                    log.error(`Failed to automatically process OCR for file note ${entity.noteId}: ${error}`);
+                });
+            }
+        }
    }
 });

--- a/apps/server/src/services/i18n.ts
+++ b/apps/server/src/services/i18n.ts
@@ -18,8 +18,7 @@ export async function initializeTranslations() {
        ns: "server",
        backend: {
            loadPath: join(resourceDir, "assets/translations/{{lng}}/{{ns}}.json")
-        },
-        showSupportNotice: false
+        }
    });

    // Initialize dayjs locale.
--- a/apps/server/src/services/image.ts
+++ b/apps/server/src/services/image.ts
@@ -12,8 +12,9 @@ import sanitizeFilename from "sanitize-filename";
 import isSvg from "is-svg";
 import isAnimated from "is-animated";
 import htmlSanitizer from "./html_sanitizer.js";
+import ocrService, { type OCRResult } from "./ocr/ocr_service.js";

-async function processImage(uploadBuffer: Buffer, originalName: string, shrinkImageSwitch: boolean) {
+async function processImage(uploadBuffer: Buffer, originalName: string, shrinkImageSwitch: boolean, noteId?: string) {
    const compressImages = optionService.getOptionBool("compressImages");
    const origImageFormat = await getImageType(uploadBuffer);

@@ -24,6 +25,42 @@ async function processImage(uploadBuffer: Buffer, originalName: string, shrinkIm
        shrinkImageSwitch = false;
    }

+    // Schedule OCR processing in the background for best quality
+    // Only auto-process if both OCR is enabled and auto-processing is enabled
+    if (noteId && ocrService.isOCREnabled() && optionService.getOptionBool("ocrAutoProcessImages") && origImageFormat) {
+        const imageMime = getImageMimeFromExtension(origImageFormat.ext);
+        const supportedMimeTypes = ocrService.getAllSupportedMimeTypes();
+
+        if (supportedMimeTypes.includes(imageMime)) {
+            // Process OCR asynchronously without blocking image creation
+            setImmediate(async () => {
+                try {
+                    const ocrResult = await ocrService.extractTextFromFile(uploadBuffer, imageMime);
+                    if (ocrResult) {
+                        // We need to get the entity again to get its blobId after it's been saved
+                        // noteId could be either a note ID or attachment ID
+                        const note = becca.getNote(noteId);
+                        const attachment = becca.getAttachment(noteId);
+                        
+                        let blobId: string | undefined;
+                        if (note && note.blobId) {
+                            blobId = note.blobId;
+                        } else if (attachment && attachment.blobId) {
+                            blobId = attachment.blobId;
+                        }
+                        
+                        if (blobId) {
+                            await ocrService.storeOCRResult(blobId, ocrResult);
+                            log.info(`Successfully processed OCR for image ${noteId} (${originalName})`);
+                        }
+                    }
+                } catch (error) {
+                    log.error(`Failed to process OCR for image ${noteId}: ${error}`);
+                }
+            });
+        }
+    }
+
    let finalImageBuffer;
    let imageFormat;

@@ -72,7 +109,7 @@ function updateImage(noteId: string, uploadBuffer: Buffer, originalName: string)
    note.setLabel("originalFileName", originalName);

    // resizing images asynchronously since JIMP does not support sync operation
-    processImage(uploadBuffer, originalName, true).then(({ buffer, imageFormat }) => {
+    processImage(uploadBuffer, originalName, true, noteId).then(({ buffer, imageFormat }) => {
        sql.transactional(() => {
            note.mime = getImageMimeFromExtension(imageFormat.ext);
            note.save();
@@ -108,7 +145,7 @@ function saveImage(parentNoteId: string, uploadBuffer: Buffer, originalName: str
    note.addLabel("originalFileName", originalName);

    // resizing images asynchronously since JIMP does not support sync operation
-    processImage(uploadBuffer, originalName, shrinkImageSwitch).then(({ buffer, imageFormat }) => {
+    processImage(uploadBuffer, originalName, shrinkImageSwitch, note.noteId).then(({ buffer, imageFormat }) => {
        sql.transactional(() => {
            note.mime = getImageMimeFromExtension(imageFormat.ext);

@@ -159,7 +196,7 @@ function saveImageToAttachment(noteId: string, uploadBuffer: Buffer, originalNam
    }, 5000);

    // resizing images asynchronously since JIMP does not support sync operation
-    processImage(uploadBuffer, originalName, !!shrinkImageSwitch).then(({ buffer, imageFormat }) => {
+    processImage(uploadBuffer, originalName, !!shrinkImageSwitch, attachment.attachmentId).then(({ buffer, imageFormat }) => {
        sql.transactional(() => {
            // re-read, might be changed in the meantime
            if (!attachment.attachmentId) {
--- a/apps/server/src/services/ocr/ocr_service.spec.ts
+++ b/apps/server/src/services/ocr/ocr_service.spec.ts
@@ -0,0 +1,823 @@
+import { afterEach,beforeEach, describe, expect, it, vi } from 'vitest';
+// Mock Tesseract.js
+const mockWorker = {
+    recognize: vi.fn(),
+    terminate: vi.fn(),
+    reinitialize: vi.fn()
+};
+
+const mockTesseract = {
+    createWorker: vi.fn().mockResolvedValue(mockWorker)
+};
+
+vi.mock('tesseract.js', () => ({
+    default: mockTesseract
+}));
+
+// Mock dependencies
+const mockOptions = {
+    getOptionBool: vi.fn(),
+    getOption: vi.fn()
+};
+
+const mockLog = {
+    info: vi.fn(),
+    error: vi.fn()
+};
+
+const mockSql = {
+    execute: vi.fn(),
+    getRow: vi.fn(),
+    getRows: vi.fn()
+};
+
+const mockBecca = {
+    getNote: vi.fn(),
+    getAttachment: vi.fn()
+};
+
+vi.mock('../options.js', () => ({
+    default: mockOptions
+}));
+
+vi.mock('../log.js', () => ({
+    default: mockLog
+}));
+
+vi.mock('../sql.js', () => ({
+    default: mockSql
+}));
+
+vi.mock('../../becca/becca.js', () => ({
+    default: mockBecca
+}));
+
+// Import the service after mocking
+let ocrService: typeof import('./ocr_service.js').default;
+
+beforeEach(async () => {
+    // Clear all mocks
+    vi.clearAllMocks();
+
+    // Reset mock implementations
+    mockOptions.getOptionBool.mockReturnValue(true);
+    mockOptions.getOption.mockReturnValue('eng');
+    mockSql.execute.mockImplementation(() => ({ lastInsertRowid: 1 }));
+    mockSql.getRow.mockReturnValue(null);
+    mockSql.getRows.mockReturnValue([]);
+
+    // Set up createWorker to properly set the worker on the service
+    mockTesseract.createWorker.mockImplementation(async () => {
+        return mockWorker;
+    });
+
+    // Dynamically import the service to ensure mocks are applied
+    const module = await import('./ocr_service.js');
+    ocrService = module.default; // It's an instance, not a class
+
+    // Reset the OCR service state
+    (ocrService as any).isInitialized = false;
+    (ocrService as any).worker = null;
+    (ocrService as any).isProcessing = false;
+    (ocrService as any).batchProcessingState = {
+        inProgress: false,
+        total: 0,
+        processed: 0
+    };
+});
+
+afterEach(() => {
+    vi.restoreAllMocks();
+});
+
+describe('OCRService', () => {
+    describe('isOCREnabled', () => {
+        it('should return true when OCR is enabled in options', () => {
+            mockOptions.getOptionBool.mockReturnValue(true);
+
+            expect(ocrService.isOCREnabled()).toBe(true);
+            expect(mockOptions.getOptionBool).toHaveBeenCalledWith('ocrEnabled');
+        });
+
+        it('should return false when OCR is disabled in options', () => {
+            mockOptions.getOptionBool.mockReturnValue(false);
+
+            expect(ocrService.isOCREnabled()).toBe(false);
+            expect(mockOptions.getOptionBool).toHaveBeenCalledWith('ocrEnabled');
+        });
+
+        it('should return false when options throws an error', () => {
+            mockOptions.getOptionBool.mockImplementation(() => {
+                throw new Error('Options not available');
+            });
+
+            expect(ocrService.isOCREnabled()).toBe(false);
+        });
+    });
+
+    describe('isSupportedMimeType', () => {
+        it('should return true for supported image MIME types', () => {
+            expect(ocrService.isSupportedMimeType('image/jpeg')).toBe(true);
+            expect(ocrService.isSupportedMimeType('image/jpg')).toBe(true);
+            expect(ocrService.isSupportedMimeType('image/png')).toBe(true);
+            expect(ocrService.isSupportedMimeType('image/gif')).toBe(true);
+            expect(ocrService.isSupportedMimeType('image/bmp')).toBe(true);
+            expect(ocrService.isSupportedMimeType('image/tiff')).toBe(true);
+        });
+
+        it('should return false for unsupported MIME types', () => {
+            expect(ocrService.isSupportedMimeType('text/plain')).toBe(false);
+            expect(ocrService.isSupportedMimeType('application/pdf')).toBe(false);
+            expect(ocrService.isSupportedMimeType('video/mp4')).toBe(false);
+            expect(ocrService.isSupportedMimeType('audio/mp3')).toBe(false);
+        });
+
+        it('should handle null/undefined MIME types', () => {
+            expect(ocrService.isSupportedMimeType(null as any)).toBe(false);
+            expect(ocrService.isSupportedMimeType(undefined as any)).toBe(false);
+            expect(ocrService.isSupportedMimeType('')).toBe(false);
+        });
+    });
+
+    describe('extractTextFromFile', () => {
+        const mockImageBuffer = Buffer.from('fake-image-data');
+
+        it('should extract text successfully with default options', async () => {
+            const mockResult = {
+                data: {
+                    text: 'Extracted text from image',
+                    confidence: 95
+                }
+            };
+            mockWorker.recognize.mockResolvedValue(mockResult);
+
+            const result = await ocrService.extractTextFromFile(mockImageBuffer, 'image/jpeg');
+
+            expect(result).toBeDefined();
+            expect(result.text).toBe('Extracted text from image');
+            expect(result.extractedAt).toEqual(expect.any(String));
+        });
+
+        it('should handle OCR recognition errors', async () => {
+            const error = new Error('OCR recognition failed');
+            mockWorker.recognize.mockRejectedValue(error);
+
+            await expect(ocrService.extractTextFromFile(mockImageBuffer, 'image/jpeg')).rejects.toThrow('OCR recognition failed');
+            expect(mockLog.error).toHaveBeenCalledWith('OCR text extraction failed: Error: OCR recognition failed');
+        });
+    });
+
+    describe('storeOCRResult', () => {
+        it('should store OCR result in blob successfully', async () => {
+            const ocrResult = {
+                text: 'Sample text',
+                confidence: 0.95,
+                extractedAt: '2025-06-10T10:00:00.000Z',
+                language: 'eng'
+            };
+
+            await ocrService.storeOCRResult('blob123', ocrResult);
+
+            expect(mockSql.execute).toHaveBeenCalledWith(
+                expect.stringContaining('UPDATE blobs SET textRepresentation = ?'),
+                ['Sample text', 'blob123']
+            );
+        });
+
+        it('should handle undefined blobId gracefully', async () => {
+            const ocrResult = {
+                text: 'Sample text',
+                confidence: 0.95,
+                extractedAt: '2025-06-10T10:00:00.000Z',
+                language: 'eng'
+            };
+
+            await ocrService.storeOCRResult(undefined, ocrResult);
+
+            expect(mockSql.execute).not.toHaveBeenCalled();
+            expect(mockLog.error).toHaveBeenCalledWith('Cannot store OCR result: blobId is undefined');
+        });
+
+        it('should handle database update errors', async () => {
+            const error = new Error('Database error');
+            mockSql.execute.mockImplementation(() => {
+                throw error;
+            });
+
+            const ocrResult = {
+                text: 'Sample text',
+                confidence: 0.95,
+                extractedAt: '2025-06-10T10:00:00.000Z',
+                language: 'eng'
+            };
+
+            await expect(ocrService.storeOCRResult('blob123', ocrResult)).rejects.toThrow('Database error');
+            expect(mockLog.error).toHaveBeenCalledWith('Failed to store OCR result for blob blob123: Error: Database error');
+        });
+    });
+
+    describe('processNoteOCR', () => {
+        const mockNote = {
+            noteId: 'note123',
+            type: 'image',
+            mime: 'image/jpeg',
+            blobId: 'blob123',
+            getContent: vi.fn()
+        };
+
+        beforeEach(() => {
+            mockBecca.getNote.mockReturnValue(mockNote);
+            mockNote.getContent.mockReturnValue(Buffer.from('fake-image-data'));
+        });
+
+        it('should process note OCR successfully', async () => {
+            // Ensure getRow returns null for all calls in this test
+            mockSql.getRow.mockImplementation(() => null);
+
+            const mockOCRResult = {
+                data: {
+                    text: 'Note image text',
+                    confidence: 90
+                }
+            };
+            mockWorker.recognize.mockResolvedValue(mockOCRResult);
+
+            const result = await ocrService.processNoteOCR('note123');
+
+            expect(result).toEqual({
+                text: 'Note image text',
+                confidence: 0.9,
+                extractedAt: expect.any(String),
+                language: 'eng'
+            });
+            expect(mockBecca.getNote).toHaveBeenCalledWith('note123');
+            expect(mockNote.getContent).toHaveBeenCalled();
+        });
+
+        it('should return existing OCR result if forceReprocess is false', async () => {
+            const existingResult = {
+                textRepresentation: 'Existing text'
+            };
+            mockSql.getRow.mockReturnValue(existingResult);
+
+            const result = await ocrService.processNoteOCR('note123');
+
+            expect(result).toEqual({
+                text: 'Existing text',
+                confidence: 0.95,
+                language: 'eng',
+                extractedAt: expect.any(String)
+            });
+            expect(mockNote.getContent).not.toHaveBeenCalled();
+        });
+
+        it('should reprocess if forceReprocess is true', async () => {
+            const existingResult = {
+                textRepresentation: 'Existing text'
+            };
+            mockSql.getRow.mockResolvedValue(existingResult);
+
+
+            const mockOCRResult = {
+                data: {
+                    text: 'New processed text',
+                    confidence: 95
+                }
+            };
+            mockWorker.recognize.mockResolvedValue(mockOCRResult);
+
+            const result = await ocrService.processNoteOCR('note123', { forceReprocess: true });
+
+            expect(result?.text).toBe('New processed text');
+            expect(mockNote.getContent).toHaveBeenCalled();
+        });
+
+        it('should return null for non-existent note', async () => {
+            mockBecca.getNote.mockReturnValue(null);
+
+            const result = await ocrService.processNoteOCR('nonexistent');
+
+            expect(result).toBe(null);
+            expect(mockLog.error).toHaveBeenCalledWith('Note nonexistent not found');
+        });
+
+        it('should return null for unsupported MIME type', async () => {
+            mockNote.mime = 'text/plain';
+
+            const result = await ocrService.processNoteOCR('note123');
+
+            expect(result).toBe(null);
+            expect(mockLog.info).toHaveBeenCalledWith('Note note123 has unsupported MIME type text/plain, skipping OCR');
+        });
+    });
+
+    describe('processAttachmentOCR', () => {
+        const mockAttachment = {
+            attachmentId: 'attach123',
+            role: 'image',
+            mime: 'image/png',
+            blobId: 'blob456',
+            getContent: vi.fn()
+        };
+
+        beforeEach(() => {
+            mockBecca.getAttachment.mockReturnValue(mockAttachment);
+            mockAttachment.getContent.mockReturnValue(Buffer.from('fake-image-data'));
+        });
+
+        it('should process attachment OCR successfully', async () => {
+            // Ensure getRow returns null for all calls in this test
+            mockSql.getRow.mockImplementation(() => null);
+
+
+            const mockOCRResult = {
+                data: {
+                    text: 'Attachment image text',
+                    confidence: 92
+                }
+            };
+            mockWorker.recognize.mockResolvedValue(mockOCRResult);
+
+            const result = await ocrService.processAttachmentOCR('attach123');
+
+            expect(result).toEqual({
+                text: 'Attachment image text',
+                confidence: 0.92,
+                extractedAt: expect.any(String),
+                language: 'eng'
+            });
+            expect(mockBecca.getAttachment).toHaveBeenCalledWith('attach123');
+        });
+
+        it('should return null for non-existent attachment', async () => {
+            mockBecca.getAttachment.mockReturnValue(null);
+
+            const result = await ocrService.processAttachmentOCR('nonexistent');
+
+            expect(result).toBe(null);
+            expect(mockLog.error).toHaveBeenCalledWith('Attachment nonexistent not found');
+        });
+    });
+
+    describe('searchOCRResults', () => {
+        it('should search OCR results successfully', () => {
+            const mockResults = [
+                {
+                    blobId: 'blob1',
+                    textRepresentation: 'Sample search text'
+                }
+            ];
+            mockSql.getRows.mockReturnValue(mockResults);
+
+            const results = ocrService.searchOCRResults('search');
+
+            expect(results).toEqual([{
+                blobId: 'blob1',
+                text: 'Sample search text'
+            }]);
+            expect(mockSql.getRows).toHaveBeenCalledWith(
+                expect.stringContaining('WHERE textRepresentation LIKE ?'),
+                ['%search%']
+            );
+        });
+
+        it('should handle search errors gracefully', () => {
+            mockSql.getRows.mockImplementation(() => {
+                throw new Error('Database error');
+            });
+
+            const results = ocrService.searchOCRResults('search');
+
+            expect(results).toEqual([]);
+            expect(mockLog.error).toHaveBeenCalledWith('Failed to search OCR results: Error: Database error');
+        });
+    });
+
+    describe('getOCRStats', () => {
+        it('should return OCR statistics successfully', () => {
+            const mockStats = {
+                total_processed: 150
+            };
+            const mockNoteStats = {
+                count: 100
+            };
+            const mockAttachmentStats = {
+                count: 50
+            };
+
+            mockSql.getRow.mockReturnValueOnce(mockStats);
+            mockSql.getRow.mockReturnValueOnce(mockNoteStats);
+            mockSql.getRow.mockReturnValueOnce(mockAttachmentStats);
+
+            const stats = ocrService.getOCRStats();
+
+            expect(stats).toEqual({
+                totalProcessed: 150,
+                imageNotes: 100,
+                imageAttachments: 50
+            });
+        });
+
+        it('should handle missing statistics gracefully', () => {
+            mockSql.getRow.mockReturnValue(null);
+
+            const stats = ocrService.getOCRStats();
+
+            expect(stats).toEqual({
+                totalProcessed: 0,
+                imageNotes: 0,
+                imageAttachments: 0
+            });
+        });
+    });
+
+    describe('Batch Processing', () => {
+        describe('startBatchProcessing', () => {
+            beforeEach(() => {
+                // Reset batch processing state
+                ocrService.cancelBatchProcessing();
+            });
+
+            it('should start batch processing when images are available', async () => {
+                mockSql.getRow.mockReturnValueOnce({ count: 5 }); // image notes
+                mockSql.getRow.mockReturnValueOnce({ count: 3 }); // image attachments
+
+                const result = await ocrService.startBatchProcessing();
+
+                expect(result).toEqual({ success: true });
+                expect(mockSql.getRow).toHaveBeenCalledTimes(2);
+            });
+
+            it('should return error if batch processing already in progress', async () => {
+                // Start first batch
+                mockSql.getRow.mockReturnValueOnce({ count: 5 });
+                mockSql.getRow.mockReturnValueOnce({ count: 3 });
+
+                // Mock background processing queries
+                const mockImageNotes = Array.from({length: 5}, (_, i) => ({
+                    noteId: `note${i}`,
+                    mime: 'image/jpeg'
+                }));
+                mockSql.getRows.mockReturnValueOnce(mockImageNotes);
+                mockSql.getRows.mockReturnValueOnce([]);
+
+                // Start without awaiting to keep it in progress
+                const firstStart = ocrService.startBatchProcessing();
+
+                // Try to start second batch immediately
+                const result = await ocrService.startBatchProcessing();
+
+                // Clean up by awaiting the first one
+                await firstStart;
+
+                expect(result).toEqual({
+                    success: false,
+                    message: 'Batch processing already in progress'
+                });
+            });
+
+            it('should return error if OCR is disabled', async () => {
+                mockOptions.getOptionBool.mockReturnValue(false);
+
+                const result = await ocrService.startBatchProcessing();
+
+                expect(result).toEqual({
+                    success: false,
+                    message: 'OCR is disabled'
+                });
+            });
+
+            it('should return error if no images need processing', async () => {
+                mockSql.getRow.mockReturnValueOnce({ count: 0 }); // image notes
+                mockSql.getRow.mockReturnValueOnce({ count: 0 }); // image attachments
+
+                const result = await ocrService.startBatchProcessing();
+
+                expect(result).toEqual({
+                    success: false,
+                    message: 'No images found that need OCR processing'
+                });
+            });
+
+            it('should handle database errors gracefully', async () => {
+                const error = new Error('Database connection failed');
+                mockSql.getRow.mockImplementation(() => {
+                    throw error;
+                });
+
+                const result = await ocrService.startBatchProcessing();
+
+                expect(result).toEqual({
+                    success: false,
+                    message: 'Database connection failed'
+                });
+                expect(mockLog.error).toHaveBeenCalledWith(
+                    'Failed to start batch processing: Database connection failed'
+                );
+            });
+        });
+
+        describe('getBatchProgress', () => {
+            it('should return initial progress state', () => {
+                const progress = ocrService.getBatchProgress();
+
+                expect(progress.inProgress).toBe(false);
+                expect(progress.total).toBe(0);
+                expect(progress.processed).toBe(0);
+            });
+
+            it('should return progress with percentage when total > 0', async () => {
+                // Start batch processing
+                mockSql.getRow.mockReturnValueOnce({ count: 10 });
+                mockSql.getRow.mockReturnValueOnce({ count: 0 });
+
+                // Mock the background processing queries to return items that will take time to process
+                const mockImageNotes = Array.from({length: 10}, (_, i) => ({
+                    noteId: `note${i}`,
+                    mime: 'image/jpeg'
+                }));
+                mockSql.getRows.mockReturnValueOnce(mockImageNotes); // image notes query
+                mockSql.getRows.mockReturnValueOnce([]); // image attachments query
+
+                const startPromise = ocrService.startBatchProcessing();
+
+                // Check progress immediately after starting (before awaiting)
+                const progress = ocrService.getBatchProgress();
+
+                await startPromise;
+
+                expect(progress.inProgress).toBe(true);
+                expect(progress.total).toBe(10);
+                expect(progress.processed).toBe(0);
+                expect(progress.percentage).toBe(0);
+                expect(progress.startTime).toBeInstanceOf(Date);
+            });
+        });
+
+        describe('cancelBatchProcessing', () => {
+            it('should cancel ongoing batch processing', async () => {
+                // Start batch processing
+                mockSql.getRow.mockReturnValueOnce({ count: 5 });
+                mockSql.getRow.mockReturnValueOnce({ count: 0 });
+
+                // Mock background processing queries
+                const mockImageNotes = Array.from({length: 5}, (_, i) => ({
+                    noteId: `note${i}`,
+                    mime: 'image/jpeg'
+                }));
+                mockSql.getRows.mockReturnValueOnce(mockImageNotes);
+                mockSql.getRows.mockReturnValueOnce([]);
+
+                const startPromise = ocrService.startBatchProcessing();
+
+                expect(ocrService.getBatchProgress().inProgress).toBe(true);
+
+                await startPromise;
+
+                ocrService.cancelBatchProcessing();
+
+                expect(ocrService.getBatchProgress().inProgress).toBe(false);
+                expect(mockLog.info).toHaveBeenCalledWith('Batch OCR processing cancelled');
+            });
+
+            it('should do nothing if no batch processing is running', () => {
+                ocrService.cancelBatchProcessing();
+
+                expect(mockLog.info).not.toHaveBeenCalledWith('Batch OCR processing cancelled');
+            });
+        });
+
+        describe('processBatchInBackground', () => {
+            it('should process image notes and attachments in sequence', async () => {
+                // Clear all mocks at the start of this test to ensure clean state
+                vi.clearAllMocks();
+
+                // Mock data for batch processing
+                const imageNotes = [
+                    { noteId: 'note1', mime: 'image/jpeg', blobId: 'blob1' },
+                    { noteId: 'note2', mime: 'image/png', blobId: 'blob2' }
+                ];
+                const imageAttachments = [
+                    { attachmentId: 'attach1', mime: 'image/gif', blobId: 'blob3' }
+                ];
+
+                // Setup mocks for startBatchProcessing
+                mockSql.getRow.mockReturnValueOnce({ count: 2 }); // image notes count
+                mockSql.getRow.mockReturnValueOnce({ count: 1 }); // image attachments count
+
+                // Setup mocks for background processing
+                mockSql.getRows.mockReturnValueOnce(imageNotes); // image notes query
+                mockSql.getRows.mockReturnValueOnce(imageAttachments); // image attachments query
+
+                // Mock successful OCR processing
+                mockWorker.recognize.mockResolvedValue({
+                    data: { text: 'Test text', confidence: 95 }
+                });
+
+                // Mock notes and attachments
+                const mockNote1 = {
+                    noteId: 'note1',
+                    type: 'image',
+                    mime: 'image/jpeg',
+                    blobId: 'blob1',
+                    getContent: vi.fn().mockReturnValue(Buffer.from('fake-image-data'))
+                };
+                const mockNote2 = {
+                    noteId: 'note2',
+                    type: 'image',
+                    mime: 'image/png',
+                    blobId: 'blob2',
+                    getContent: vi.fn().mockReturnValue(Buffer.from('fake-image-data'))
+                };
+                const mockAttachment = {
+                    attachmentId: 'attach1',
+                    role: 'image',
+                    mime: 'image/gif',
+                    blobId: 'blob3',
+                    getContent: vi.fn().mockReturnValue(Buffer.from('fake-image-data'))
+                };
+
+                mockBecca.getNote.mockImplementation((noteId) => {
+                    if (noteId === 'note1') return mockNote1;
+                    if (noteId === 'note2') return mockNote2;
+                    return null;
+                });
+                mockBecca.getAttachment.mockReturnValue(mockAttachment);
+                mockSql.getRow.mockReturnValue(null); // No existing OCR results
+
+                // Start batch processing
+                await ocrService.startBatchProcessing();
+
+                // Wait for background processing to complete
+                // Need to wait longer since there's a 500ms delay between each item in batch processing
+                await new Promise(resolve => setTimeout(resolve, 2000));
+
+                // Verify notes and attachments were processed
+                expect(mockBecca.getNote).toHaveBeenCalledWith('note1');
+                expect(mockBecca.getNote).toHaveBeenCalledWith('note2');
+                expect(mockBecca.getAttachment).toHaveBeenCalledWith('attach1');
+            });
+
+            it('should handle processing errors gracefully', async () => {
+                const imageNotes = [
+                    { noteId: 'note1', mime: 'image/jpeg', blobId: 'blob1' }
+                ];
+
+                // Setup mocks for startBatchProcessing
+                mockSql.getRow.mockReturnValueOnce({ count: 1 });
+                mockSql.getRow.mockReturnValueOnce({ count: 0 });
+
+                // Setup mocks for background processing
+                mockSql.getRows.mockReturnValueOnce(imageNotes);
+                mockSql.getRows.mockReturnValueOnce([]);
+
+                // Mock note that will cause an error
+                const mockNote = {
+                    noteId: 'note1',
+                    type: 'image',
+                    mime: 'image/jpeg',
+                    blobId: 'blob1',
+                    getContent: vi.fn().mockImplementation(() => { throw new Error('Failed to get content'); })
+                };
+                mockBecca.getNote.mockReturnValue(mockNote);
+                mockSql.getRow.mockReturnValue(null);
+
+                // Start batch processing
+                await ocrService.startBatchProcessing();
+
+                // Wait for background processing to complete
+                await new Promise(resolve => setTimeout(resolve, 100));
+
+                // Verify error was logged but processing continued
+                expect(mockLog.error).toHaveBeenCalledWith(
+                    expect.stringContaining('Failed to process OCR for note note1')
+                );
+            });
+
+            it('should stop processing when cancelled', async () => {
+                const imageNotes = [
+                    { noteId: 'note1', mime: 'image/jpeg', blobId: 'blob1' },
+                    { noteId: 'note2', mime: 'image/png', blobId: 'blob2' }
+                ];
+
+                // Setup mocks
+                mockSql.getRow.mockReturnValueOnce({ count: 2 });
+                mockSql.getRow.mockReturnValueOnce({ count: 0 });
+                mockSql.getRows.mockReturnValueOnce(imageNotes);
+                mockSql.getRows.mockReturnValueOnce([]);
+
+                // Start batch processing
+                await ocrService.startBatchProcessing();
+
+                // Cancel immediately
+                ocrService.cancelBatchProcessing();
+
+                // Wait for background processing to complete
+                await new Promise(resolve => setTimeout(resolve, 100));
+
+                // Verify processing was stopped early
+                expect(ocrService.getBatchProgress().inProgress).toBe(false);
+            });
+
+            it('should skip unsupported MIME types', async () => {
+                const imageNotes = [
+                    { noteId: 'note1', mime: 'text/plain', blobId: 'blob1' }, // unsupported
+                    { noteId: 'note2', mime: 'image/jpeg', blobId: 'blob2' }  // supported
+                ];
+
+                // Setup mocks
+                mockSql.getRow.mockReturnValueOnce({ count: 2 });
+                mockSql.getRow.mockReturnValueOnce({ count: 0 });
+                mockSql.getRows.mockReturnValueOnce(imageNotes);
+                mockSql.getRows.mockReturnValueOnce([]);
+
+                const mockNote = {
+                    noteId: 'note2',
+                    type: 'image',
+                    mime: 'image/jpeg',
+                    blobId: 'blob2',
+                    getContent: vi.fn().mockReturnValue(Buffer.from('fake-image-data'))
+                };
+                mockBecca.getNote.mockReturnValue(mockNote);
+                mockSql.getRow.mockReturnValue(null);
+                mockWorker.recognize.mockResolvedValue({
+                    data: { text: 'Test text', confidence: 95 }
+                });
+
+                // Start batch processing
+                await ocrService.startBatchProcessing();
+
+                // Wait for background processing to complete
+                await new Promise(resolve => setTimeout(resolve, 100));
+
+                // Verify only supported MIME type was processed
+                expect(mockBecca.getNote).toHaveBeenCalledWith('note2');
+                expect(mockBecca.getNote).not.toHaveBeenCalledWith('note1');
+            });
+        });
+    });
+
+    describe('deleteOCRResult', () => {
+        it('should delete OCR result successfully', () => {
+            ocrService.deleteOCRResult('blob123');
+
+            expect(mockSql.execute).toHaveBeenCalledWith(
+                expect.stringContaining('UPDATE blobs SET textRepresentation = NULL'),
+                ['blob123']
+            );
+            expect(mockLog.info).toHaveBeenCalledWith('Deleted OCR result for blob blob123');
+        });
+
+        it('should handle deletion errors', () => {
+            mockSql.execute.mockImplementation(() => {
+                throw new Error('Database error');
+            });
+
+            expect(() => ocrService.deleteOCRResult('blob123')).toThrow('Database error');
+            expect(mockLog.error).toHaveBeenCalledWith('Failed to delete OCR result for blob blob123: Error: Database error');
+        });
+    });
+
+    describe('isCurrentlyProcessing', () => {
+        it('should return false initially', () => {
+            expect(ocrService.isCurrentlyProcessing()).toBe(false);
+        });
+
+        it('should return true during processing', async () => {
+            mockBecca.getNote.mockReturnValue({
+                noteId: 'note123',
+                mime: 'image/jpeg',
+                blobId: 'blob123',
+                getContent: vi.fn().mockReturnValue(Buffer.from('fake-image-data'))
+            });
+            mockSql.getRow.mockResolvedValue(null);
+
+            mockWorker.recognize.mockImplementation(() => {
+                expect(ocrService.isCurrentlyProcessing()).toBe(true);
+                return Promise.resolve({
+                    data: { text: 'test', confidence: 90 }
+                });
+            });
+
+            await ocrService.processNoteOCR('note123');
+            expect(ocrService.isCurrentlyProcessing()).toBe(false);
+        });
+    });
+
+    describe('cleanup', () => {
+        it('should terminate worker on cleanup', async () => {
+
+            await ocrService.cleanup();
+
+            expect(mockWorker.terminate).toHaveBeenCalled();
+            expect(mockLog.info).toHaveBeenCalledWith('OCR service cleaned up');
+        });
+
+        it('should handle cleanup when worker is not initialized', async () => {
+            await ocrService.cleanup();
+
+            expect(mockWorker.terminate).not.toHaveBeenCalled();
+            expect(mockLog.info).toHaveBeenCalledWith('OCR service cleaned up');
+        });
+    });
+});
--- a/apps/server/src/services/ocr/ocr_service.ts
+++ b/apps/server/src/services/ocr/ocr_service.ts
@@ -0,0 +1,752 @@
+import Tesseract from 'tesseract.js';
+import log from '../log.js';
+import sql from '../sql.js';
+import becca from '../../becca/becca.js';
+import options from '../options.js';
+import { ImageProcessor } from './processors/image_processor.js';
+import { PDFProcessor } from './processors/pdf_processor.js';
+import { TIFFProcessor } from './processors/tiff_processor.js';
+import { OfficeProcessor } from './processors/office_processor.js';
+import { FileProcessor } from './processors/file_processor.js';
+
+export interface OCRResult {
+    text: string;
+    confidence: number;
+    extractedAt: string;
+    language?: string;
+    pageCount?: number;
+}
+
+export interface OCRProcessingOptions {
+    language?: string;
+    forceReprocess?: boolean;
+    confidence?: number;
+    enablePDFTextExtraction?: boolean;
+}
+
+interface OCRBlobRow {
+    blobId: string;
+    textRepresentation: string;
+    textExtractionLastProcessed?: string;
+}
+
+/**
+ * OCR Service for extracting text from images and other OCR-able objects
+ * Uses Tesseract.js for text recognition
+ */
+class OCRService {
+    private worker: Tesseract.Worker | null = null;
+    private isProcessing = false;
+    private processors: Map<string, FileProcessor> = new Map();
+
+    constructor() {
+        // Initialize file processors
+        this.processors.set('image', new ImageProcessor());
+        this.processors.set('pdf', new PDFProcessor());
+        this.processors.set('tiff', new TIFFProcessor());
+        this.processors.set('office', new OfficeProcessor());
+    }
+
+    /**
+     * Check if OCR is enabled in settings
+     */
+    isOCREnabled(): boolean {
+        try {
+            return options.getOptionBool('ocrEnabled');
+        } catch (error) {
+            log.error(`Failed to check OCR enabled status: ${error}`);
+            return false;
+        }
+    }
+
+    /**
+     * Check if a MIME type is supported for OCR
+     */
+    isSupportedMimeType(mimeType: string): boolean {
+        if (!mimeType || typeof mimeType !== 'string') {
+            return false;
+        }
+
+        const supportedTypes = [
+            'image/jpeg',
+            'image/jpg',
+            'image/png',
+            'image/gif',
+            'image/bmp',
+            'image/tiff',
+            'image/webp'
+        ];
+        return supportedTypes.includes(mimeType.toLowerCase());
+    }
+
+    /**
+     * Extract text from file buffer using appropriate processor
+     */
+    async extractTextFromFile(fileBuffer: Buffer, mimeType: string, options: OCRProcessingOptions = {}): Promise<OCRResult> {
+        try {
+            log.info(`Starting OCR text extraction for MIME type: ${mimeType}`);
+            this.isProcessing = true;
+
+            // Find appropriate processor
+            const processor = this.getProcessorForMimeType(mimeType);
+            if (!processor) {
+                throw new Error(`No processor found for MIME type: ${mimeType}`);
+            }
+
+            const result = await processor.extractText(fileBuffer, options);
+
+            log.info(`OCR extraction completed. Confidence: ${result.confidence}%, Text length: ${result.text.length}`);
+            return result;
+
+        } catch (error) {
+            log.error(`OCR text extraction failed: ${error}`);
+            throw error;
+        } finally {
+            this.isProcessing = false;
+        }
+    }
+
+    /**
+     * Process OCR for a note (image type)
+     */
+    async processNoteOCR(noteId: string, options: OCRProcessingOptions = {}): Promise<OCRResult | null> {
+        if (!this.isOCREnabled()) {
+            log.info('OCR is disabled in settings');
+            return null;
+        }
+
+        const note = becca.getNote(noteId);
+        if (!note) {
+            log.error(`Note ${noteId} not found`);
+            return null;
+        }
+
+        // Check if note type and MIME type are supported for OCR
+        if (note.type === 'image') {
+            if (!this.isSupportedMimeType(note.mime)) {
+                log.info(`Image note ${noteId} has unsupported MIME type ${note.mime}, skipping OCR`);
+                return null;
+            }
+        } else if (note.type === 'file') {
+            // Check if file MIME type is supported by any processor
+            const processor = this.getProcessorForMimeType(note.mime);
+            if (!processor) {
+                log.info(`File note ${noteId} has unsupported MIME type ${note.mime} for OCR, skipping`);
+                return null;
+            }
+        } else {
+            log.info(`Note ${noteId} is not an image or file note, skipping OCR`);
+            return null;
+        }
+
+        // Check if OCR already exists and is up-to-date
+        const existingOCR = this.getStoredOCRResult(note.blobId);
+        if (existingOCR && !options.forceReprocess && note.blobId && !this.needsReprocessing(note.blobId)) {
+            log.info(`OCR already exists and is up-to-date for note ${noteId}, returning cached result`);
+            return existingOCR;
+        }
+
+        try {
+            const content = note.getContent();
+            if (!content || !(content instanceof Buffer)) {
+                throw new Error(`Cannot get image content for note ${noteId}`);
+            }
+
+            const ocrResult = await this.extractTextFromFile(content, note.mime, options);
+
+            // Store OCR result in blob
+            await this.storeOCRResult(note.blobId, ocrResult);
+
+            return ocrResult;
+        } catch (error) {
+            log.error(`Failed to process OCR for note ${noteId}: ${error}`);
+            throw error;
+        }
+    }
+
+    /**
+     * Process OCR for an attachment
+     */
+    async processAttachmentOCR(attachmentId: string, options: OCRProcessingOptions = {}): Promise<OCRResult | null> {
+        if (!this.isOCREnabled()) {
+            log.info('OCR is disabled in settings');
+            return null;
+        }
+
+        const attachment = becca.getAttachment(attachmentId);
+        if (!attachment) {
+            log.error(`Attachment ${attachmentId} not found`);
+            return null;
+        }
+
+        // Check if attachment role and MIME type are supported for OCR
+        if (attachment.role === 'image') {
+            if (!this.isSupportedMimeType(attachment.mime)) {
+                log.info(`Image attachment ${attachmentId} has unsupported MIME type ${attachment.mime}, skipping OCR`);
+                return null;
+            }
+        } else if (attachment.role === 'file') {
+            // Check if file MIME type is supported by any processor
+            const processor = this.getProcessorForMimeType(attachment.mime);
+            if (!processor) {
+                log.info(`File attachment ${attachmentId} has unsupported MIME type ${attachment.mime} for OCR, skipping`);
+                return null;
+            }
+        } else {
+            log.info(`Attachment ${attachmentId} is not an image or file, skipping OCR`);
+            return null;
+        }
+
+        // Check if OCR already exists and is up-to-date
+        const existingOCR = this.getStoredOCRResult(attachment.blobId);
+        if (existingOCR && !options.forceReprocess && attachment.blobId && !this.needsReprocessing(attachment.blobId)) {
+            log.info(`OCR already exists and is up-to-date for attachment ${attachmentId}, returning cached result`);
+            return existingOCR;
+        }
+
+        try {
+            const content = attachment.getContent();
+            if (!content || !(content instanceof Buffer)) {
+                throw new Error(`Cannot get image content for attachment ${attachmentId}`);
+            }
+
+            const ocrResult = await this.extractTextFromFile(content, attachment.mime, options);
+
+            // Store OCR result in blob
+            await this.storeOCRResult(attachment.blobId, ocrResult);
+
+            return ocrResult;
+        } catch (error) {
+            log.error(`Failed to process OCR for attachment ${attachmentId}: ${error}`);
+            throw error;
+        }
+    }
+
+    /**
+     * Store OCR result in blob
+     */
+    async storeOCRResult(blobId: string | undefined, ocrResult: OCRResult): Promise<void> {
+        if (!blobId) {
+            log.error('Cannot store OCR result: blobId is undefined');
+            return;
+        }
+
+        try {
+            // Store OCR text and timestamp in blobs table
+            sql.execute(`
+                UPDATE blobs SET
+                    textRepresentation = ?,
+                    textExtractionLastProcessed = ?
+                WHERE blobId = ?
+            `, [
+                ocrResult.text,
+                new Date().toISOString(),
+                blobId
+            ]);
+
+            log.info(`Stored OCR result for blob ${blobId}`);
+        } catch (error) {
+            log.error(`Failed to store OCR result for blob ${blobId}: ${error}`);
+            throw error;
+        }
+    }
+
+    /**
+     * Get stored OCR result from blob
+     */
+    private getStoredOCRResult(blobId: string | undefined): OCRResult | null {
+        if (!blobId) {
+            return null;
+        }
+
+        try {
+            const row = sql.getRow<{
+                textRepresentation: string | null;
+            }>(`
+                SELECT textRepresentation
+                FROM blobs
+                WHERE blobId = ?
+            `, [blobId]);
+
+            if (!row || !row.textRepresentation) {
+                return null;
+            }
+
+            // Return basic OCR result from stored text
+            // Note: we lose confidence, language, and extractedAt metadata
+            // but gain simplicity by storing directly in blob
+            return {
+                text: row.textRepresentation,
+                confidence: 0.95, // Default high confidence for existing OCR
+                extractedAt: new Date().toISOString(),
+                language: 'eng'
+            };
+        } catch (error) {
+            log.error(`Failed to get OCR result for blob ${blobId}: ${error}`);
+            return null;
+        }
+    }
+
+    /**
+     * Search for text in OCR results
+     */
+    searchOCRResults(searchText: string): Array<{ blobId: string; text: string }> {
+        try {
+            const query = `
+                SELECT blobId, textRepresentation
+                FROM blobs
+                WHERE textRepresentation LIKE ?
+                AND textRepresentation IS NOT NULL
+            `;
+            const params = [`%${searchText}%`];
+
+            const rows = sql.getRows<OCRBlobRow>(query, params);
+
+            return rows.map(row => ({
+                blobId: row.blobId,
+                text: row.textRepresentation
+            }));
+        } catch (error) {
+            log.error(`Failed to search OCR results: ${error}`);
+            return [];
+        }
+    }
+
+    /**
+     * Delete OCR results for a blob
+     */
+    deleteOCRResult(blobId: string): void {
+        try {
+            sql.execute(`
+                UPDATE blobs SET textRepresentation = NULL
+                WHERE blobId = ?
+            `, [blobId]);
+
+            log.info(`Deleted OCR result for blob ${blobId}`);
+        } catch (error) {
+            log.error(`Failed to delete OCR result for blob ${blobId}: ${error}`);
+            throw error;
+        }
+    }
+
+    /**
+     * Process OCR for all files that don't have OCR results yet or need reprocessing
+     */
+    async processAllImages(): Promise<void> {
+        return this.processAllBlobsNeedingOCR();
+    }
+
+    /**
+     * Get OCR statistics
+     */
+    getOCRStats(): { totalProcessed: number; imageNotes: number; imageAttachments: number } {
+        try {
+            const stats = sql.getRow<{
+                total_processed: number;
+            }>(`
+                SELECT COUNT(*) as total_processed
+                FROM blobs
+                WHERE textRepresentation IS NOT NULL AND textRepresentation != ''
+            `);
+
+            // Count image notes with OCR
+            const noteStats = sql.getRow<{
+                count: number;
+            }>(`
+                SELECT COUNT(*) as count
+                FROM notes n
+                JOIN blobs b ON n.blobId = b.blobId
+                WHERE n.type = 'image'
+                AND n.isDeleted = 0
+                AND b.textRepresentation IS NOT NULL AND b.textRepresentation != ''
+            `);
+
+            // Count image attachments with OCR
+            const attachmentStats = sql.getRow<{
+                count: number;
+            }>(`
+                SELECT COUNT(*) as count
+                FROM attachments a
+                JOIN blobs b ON a.blobId = b.blobId
+                WHERE a.role = 'image'
+                AND a.isDeleted = 0
+                AND b.textRepresentation IS NOT NULL AND b.textRepresentation != ''
+            `);
+
+            return {
+                totalProcessed: stats?.total_processed || 0,
+                imageNotes: noteStats?.count || 0,
+                imageAttachments: attachmentStats?.count || 0
+            };
+        } catch (error) {
+            log.error(`Failed to get OCR stats: ${error}`);
+            return { totalProcessed: 0, imageNotes: 0, imageAttachments: 0 };
+        }
+    }
+
+    /**
+     * Clean up OCR service
+     */
+    async cleanup(): Promise<void> {
+        if (this.worker) {
+            await this.worker.terminate();
+            this.worker = null;
+        }
+        log.info('OCR service cleaned up');
+    }
+
+    /**
+     * Check if currently processing
+     */
+    isCurrentlyProcessing(): boolean {
+        return this.isProcessing;
+    }
+
+    // Batch processing state
+    private batchProcessingState: {
+        inProgress: boolean;
+        total: number;
+        processed: number;
+        startTime?: Date;
+    } = {
+        inProgress: false,
+        total: 0,
+        processed: 0
+    };
+
+    /**
+     * Start batch OCR processing with progress tracking
+     */
+    async startBatchProcessing(): Promise<{ success: boolean; message?: string }> {
+        if (this.batchProcessingState.inProgress) {
+            return { success: false, message: 'Batch processing already in progress' };
+        }
+
+        if (!this.isOCREnabled()) {
+            return { success: false, message: 'OCR is disabled' };
+        }
+
+        try {
+            // Count total blobs needing OCR processing
+            const blobsNeedingOCR = this.getBlobsNeedingOCR();
+            const totalCount = blobsNeedingOCR.length;
+
+            if (totalCount === 0) {
+                return { success: false, message: 'No images found that need OCR processing' };
+            }
+
+            // Initialize batch processing state
+            this.batchProcessingState = {
+                inProgress: true,
+                total: totalCount,
+                processed: 0,
+                startTime: new Date()
+            };
+
+            // Start processing in background
+            this.processBatchInBackground(blobsNeedingOCR).catch(error => {
+                log.error(`Batch processing failed: ${error instanceof Error ? error.message : String(error)}`);
+                this.batchProcessingState.inProgress = false;
+            });
+
+            return { success: true };
+        } catch (error) {
+            log.error(`Failed to start batch processing: ${error instanceof Error ? error.message : String(error)}`);
+            return { success: false, message: error instanceof Error ? error.message : String(error) };
+        }
+    }
+
+    /**
+     * Get batch processing progress
+     */
+    getBatchProgress(): { inProgress: boolean; total: number; processed: number; percentage?: number; startTime?: Date } {
+        const result: { inProgress: boolean; total: number; processed: number; percentage?: number; startTime?: Date } = { ...this.batchProcessingState };
+        if (result.total > 0) {
+            result.percentage = (result.processed / result.total) * 100;
+        }
+        return result;
+    }
+
+    /**
+     * Process batch OCR in background with progress tracking
+     */
+    private async processBatchInBackground(blobsToProcess: Array<{ blobId: string; mimeType: string; entityType: 'note' | 'attachment'; entityId: string }>): Promise<void> {
+        try {
+            log.info('Starting batch OCR processing...');
+
+            for (const blobInfo of blobsToProcess) {
+                if (!this.batchProcessingState.inProgress) {
+                    break; // Stop if processing was cancelled
+                }
+
+                try {
+                    if (blobInfo.entityType === 'note') {
+                        await this.processNoteOCR(blobInfo.entityId);
+                    } else {
+                        await this.processAttachmentOCR(blobInfo.entityId);
+                    }
+                    this.batchProcessingState.processed++;
+                    // Add small delay to prevent overwhelming the system
+                    await new Promise(resolve => setTimeout(resolve, 500));
+                } catch (error) {
+                    log.error(`Failed to process OCR for ${blobInfo.entityType} ${blobInfo.entityId}: ${error}`);
+                    this.batchProcessingState.processed++; // Count as processed even if failed
+                }
+            }
+
+            // Mark as completed
+            this.batchProcessingState.inProgress = false;
+            log.info(`Batch OCR processing completed. Processed ${this.batchProcessingState.processed} files.`);
+        } catch (error) {
+            log.error(`Batch OCR processing failed: ${error}`);
+            this.batchProcessingState.inProgress = false;
+            throw error;
+        }
+    }
+
+    /**
+     * Cancel batch processing
+     */
+    cancelBatchProcessing(): void {
+        if (this.batchProcessingState.inProgress) {
+            this.batchProcessingState.inProgress = false;
+            log.info('Batch OCR processing cancelled');
+        }
+    }
+
+    /**
+     * Get processor for a given MIME type
+     */
+    private getProcessorForMimeType(mimeType: string): FileProcessor | null {
+        for (const processor of this.processors.values()) {
+            if (processor.canProcess(mimeType)) {
+                return processor;
+            }
+        }
+        return null;
+    }
+
+    /**
+     * Get all MIME types supported by all registered processors
+     */
+    getAllSupportedMimeTypes(): string[] {
+        const supportedTypes = new Set<string>();
+
+        // Gather MIME types from all registered processors
+        for (const processor of this.processors.values()) {
+            const processorTypes = processor.getSupportedMimeTypes();
+            processorTypes.forEach(type => supportedTypes.add(type));
+        }
+
+        return Array.from(supportedTypes);
+    }
+
+    /**
+     * Check if a MIME type is supported by any processor
+     */
+    isSupportedByAnyProcessor(mimeType: string): boolean {
+        if (!mimeType) return false;
+
+        // Check if any processor can handle this MIME type
+        const processor = this.getProcessorForMimeType(mimeType);
+        return processor !== null;
+    }
+
+    /**
+     * Check if blob needs OCR re-processing due to content changes
+     */
+    needsReprocessing(blobId: string): boolean {
+        if (!blobId) {
+            return false;
+        }
+
+        try {
+            const blobInfo = sql.getRow<{
+                utcDateModified: string;
+                textExtractionLastProcessed: string | null;
+            }>(`
+                SELECT utcDateModified, textExtractionLastProcessed
+                FROM blobs
+                WHERE blobId = ?
+            `, [blobId]);
+
+            if (!blobInfo) {
+                return false;
+            }
+
+            // If OCR was never processed, it needs processing
+            if (!blobInfo.textExtractionLastProcessed) {
+                return true;
+            }
+
+            // If blob was modified after last OCR processing, it needs re-processing
+            const blobModified = new Date(blobInfo.utcDateModified);
+            const lastOcrProcessed = new Date(blobInfo.textExtractionLastProcessed);
+
+            return blobModified > lastOcrProcessed;
+        } catch (error) {
+            log.error(`Failed to check if blob ${blobId} needs reprocessing: ${error}`);
+            return false;
+        }
+    }
+
+    /**
+     * Invalidate OCR results for a blob (clear textRepresentation and textExtractionLastProcessed)
+     */
+    invalidateOCRResult(blobId: string): void {
+        if (!blobId) {
+            return;
+        }
+
+        try {
+            sql.execute(`
+                UPDATE blobs SET
+                    textRepresentation = NULL,
+                    textExtractionLastProcessed = NULL
+                WHERE blobId = ?
+            `, [blobId]);
+
+            log.info(`Invalidated OCR result for blob ${blobId}`);
+        } catch (error) {
+            log.error(`Failed to invalidate OCR result for blob ${blobId}: ${error}`);
+            throw error;
+        }
+    }
+
+    /**
+     * Get blobs that need OCR processing (modified after last OCR or never processed)
+     */
+    getBlobsNeedingOCR(): Array<{ blobId: string; mimeType: string; entityType: 'note' | 'attachment'; entityId: string }> {
+        try {
+            // Get notes with blobs that need OCR (both image notes and file notes with supported MIME types)
+            const noteBlobs = sql.getRows<{
+                blobId: string;
+                mimeType: string;
+                entityId: string;
+            }>(`
+                SELECT n.blobId, n.mime as mimeType, n.noteId as entityId
+                FROM notes n
+                JOIN blobs b ON n.blobId = b.blobId
+                WHERE (
+                    n.type = 'image'
+                    OR (
+                        n.type = 'file'
+                        AND n.mime IN (
+                            'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+                            'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+                            'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+                            'application/msword',
+                            'application/vnd.ms-excel',
+                            'application/vnd.ms-powerpoint',
+                            'application/rtf',
+                            'application/pdf',
+                            'image/jpeg',
+                            'image/jpg',
+                            'image/png',
+                            'image/gif',
+                            'image/bmp',
+                            'image/tiff',
+                            'image/webp'
+                        )
+                    )
+                )
+                AND n.isDeleted = 0
+                AND n.blobId IS NOT NULL
+                AND (
+                    b.textExtractionLastProcessed IS NULL
+                    OR b.utcDateModified > b.textExtractionLastProcessed
+                )
+            `);
+
+            // Get attachments with blobs that need OCR (both image and file attachments with supported MIME types)
+            const attachmentBlobs = sql.getRows<{
+                blobId: string;
+                mimeType: string;
+                entityId: string;
+            }>(`
+                SELECT a.blobId, a.mime as mimeType, a.attachmentId as entityId
+                FROM attachments a
+                JOIN blobs b ON a.blobId = b.blobId
+                WHERE (
+                    a.role = 'image'
+                    OR (
+                        a.role = 'file'
+                        AND a.mime IN (
+                            'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+                            'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+                            'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+                            'application/msword',
+                            'application/vnd.ms-excel',
+                            'application/vnd.ms-powerpoint',
+                            'application/rtf',
+                            'application/pdf',
+                            'image/jpeg',
+                            'image/jpg',
+                            'image/png',
+                            'image/gif',
+                            'image/bmp',
+                            'image/tiff',
+                            'image/webp'
+                        )
+                    )
+                )
+                AND a.isDeleted = 0
+                AND a.blobId IS NOT NULL
+                AND (
+                    b.textExtractionLastProcessed IS NULL
+                    OR b.utcDateModified > b.textExtractionLastProcessed
+                )
+            `);
+
+            // Combine results
+            const result = [
+                ...noteBlobs.map(blob => ({ ...blob, entityType: 'note' as const })),
+                ...attachmentBlobs.map(blob => ({ ...blob, entityType: 'attachment' as const }))
+            ];
+
+            // Return all results (no need to filter by MIME type as we already did in the query)
+            return result;
+        } catch (error) {
+            log.error(`Failed to get blobs needing OCR: ${error}`);
+            return [];
+        }
+    }
+
+    /**
+     * Process OCR for all blobs that need it (auto-processing)
+     */
+    async processAllBlobsNeedingOCR(): Promise<void> {
+        if (!this.isOCREnabled()) {
+            log.info('OCR is disabled, skipping auto-processing');
+            return;
+        }
+
+        const blobsNeedingOCR = this.getBlobsNeedingOCR();
+        if (blobsNeedingOCR.length === 0) {
+            log.info('No blobs need OCR processing');
+            return;
+        }
+
+        log.info(`Auto-processing OCR for ${blobsNeedingOCR.length} blobs...`);
+
+        for (const blobInfo of blobsNeedingOCR) {
+            try {
+                if (blobInfo.entityType === 'note') {
+                    await this.processNoteOCR(blobInfo.entityId);
+                } else {
+                    await this.processAttachmentOCR(blobInfo.entityId);
+                }
+
+                // Add small delay to prevent overwhelming the system
+                await new Promise(resolve => setTimeout(resolve, 100));
+            } catch (error) {
+                log.error(`Failed to auto-process OCR for ${blobInfo.entityType} ${blobInfo.entityId}: ${error}`);
+                // Continue with other blobs
+            }
+        }
+
+        log.info('Auto-processing OCR completed');
+    }
+}
+
+export default new OCRService();
--- a/apps/server/src/services/ocr/processors/file_processor.ts
+++ b/apps/server/src/services/ocr/processors/file_processor.ts
@@ -0,0 +1,33 @@
+import { OCRResult, OCRProcessingOptions } from '../ocr_service.js';
+
+/**
+ * Base class for file processors that extract text from different file types
+ */
+export abstract class FileProcessor {
+    /**
+     * Check if this processor can handle the given MIME type
+     */
+    abstract canProcess(mimeType: string): boolean;
+
+    /**
+     * Extract text from the given file buffer
+     */
+    abstract extractText(buffer: Buffer, options: OCRProcessingOptions): Promise<OCRResult>;
+
+    /**
+     * Get the processing type identifier
+     */
+    abstract getProcessingType(): string;
+
+    /**
+     * Get list of MIME types supported by this processor
+     */
+    abstract getSupportedMimeTypes(): string[];
+
+    /**
+     * Clean up any resources
+     */
+    cleanup(): Promise<void> {
+        return Promise.resolve();
+    }
+}
--- a/apps/server/src/services/ocr/processors/image_processor.ts
+++ b/apps/server/src/services/ocr/processors/image_processor.ts
@@ -0,0 +1,236 @@
+import Tesseract from 'tesseract.js';
+
+import log from '../../log.js';
+import options from '../../options.js';
+import { OCRProcessingOptions,OCRResult } from '../ocr_service.js';
+import { FileProcessor } from './file_processor.js';
+
+/**
+ * Image processor for extracting text from image files using Tesseract
+ */
+export class ImageProcessor extends FileProcessor {
+    private worker: Tesseract.Worker | null = null;
+    private isInitialized = false;
+    private readonly supportedTypes = [
+        'image/jpeg',
+        'image/jpg',
+        'image/png',
+        'image/gif',
+        'image/bmp',
+        'image/tiff',
+        'image/webp'
+    ];
+
+    canProcess(mimeType: string): boolean {
+        return this.supportedTypes.includes(mimeType.toLowerCase());
+    }
+
+    getSupportedMimeTypes(): string[] {
+        return [...this.supportedTypes];
+    }
+
+    async extractText(buffer: Buffer, options: OCRProcessingOptions = {}): Promise<OCRResult> {
+        if (!this.isInitialized) {
+            await this.initialize();
+        }
+
+        if (!this.worker) {
+            throw new Error('Image processor worker not initialized');
+        }
+
+        try {
+            log.info('Starting image OCR text extraction...');
+
+            // Set language if specified and different from current
+            // Support multi-language format like 'ron+eng'
+            const language = options.language || this.getDefaultOCRLanguage();
+
+            // Validate language format
+            if (!this.isValidLanguageFormat(language)) {
+                throw new Error(`Invalid OCR language format: ${language}. Use format like 'eng' or 'ron+eng'`);
+            }
+
+            if (language !== 'eng') {
+                // For different languages, create a new worker
+                await this.worker.terminate();
+                log.info(`Initializing Tesseract worker for language(s): ${language}`);
+                this.worker = await Tesseract.createWorker(language, 1, {
+                    logger: (m: { status: string; progress: number }) => {
+                        if (m.status === 'recognizing text') {
+                            log.info(`Image OCR progress (${language}): ${Math.round(m.progress * 100)}%`);
+                        }
+                    }
+                });
+            }
+
+            const result = await this.worker.recognize(buffer);
+
+            // Filter text based on minimum confidence threshold
+            const { filteredText, overallConfidence } = this.filterTextByConfidence(result.data, options);
+
+            const ocrResult: OCRResult = {
+                text: filteredText,
+                confidence: overallConfidence,
+                extractedAt: new Date().toISOString(),
+                language: options.language || this.getDefaultOCRLanguage(),
+                pageCount: 1
+            };
+
+            log.info(`Image OCR extraction completed. Confidence: ${ocrResult.confidence}%, Text length: ${ocrResult.text.length}`);
+            return ocrResult;
+
+        } catch (error) {
+            log.error(`Image OCR text extraction failed: ${error}`);
+            throw error;
+        }
+    }
+
+    getProcessingType(): string {
+        return 'image';
+    }
+
+    private async initialize(): Promise<void> {
+        if (this.isInitialized) {
+            return;
+        }
+
+        try {
+            log.info('Initializing image OCR processor with Tesseract.js...');
+
+            // Configure proper paths for Node.js environment
+            const tesseractDir = require.resolve('tesseract.js').replace('/src/index.js', '');
+            const workerPath = require.resolve('tesseract.js/src/worker-script/node/index.js');
+            const corePath = require.resolve('tesseract.js-core/tesseract-core.wasm.js');
+
+            log.info(`Using worker path: ${workerPath}`);
+            log.info(`Using core path: ${corePath}`);
+
+            this.worker = await Tesseract.createWorker(this.getDefaultOCRLanguage(), 1, {
+                workerPath,
+                corePath,
+                logger: (m: { status: string; progress: number }) => {
+                    if (m.status === 'recognizing text') {
+                        log.info(`Image OCR progress: ${Math.round(m.progress * 100)}%`);
+                    }
+                }
+            });
+            this.isInitialized = true;
+            log.info('Image OCR processor initialized successfully');
+        } catch (error) {
+            log.error(`Failed to initialize image OCR processor: ${error}`);
+            throw error;
+        }
+    }
+
+    async cleanup(): Promise<void> {
+        if (this.worker) {
+            await this.worker.terminate();
+            this.worker = null;
+        }
+        this.isInitialized = false;
+        log.info('Image OCR processor cleaned up');
+    }
+
+    /**
+     * Get default OCR language from options
+     */
+    private getDefaultOCRLanguage(): string {
+        try {
+            const ocrLanguage = options.getOption('ocrLanguage');
+            if (!ocrLanguage) {
+                throw new Error('OCR language not configured in user settings');
+            }
+            return ocrLanguage;
+        } catch (error) {
+            log.error(`Failed to get default OCR language: ${error}`);
+            throw new Error('OCR language must be configured in settings before processing');
+        }
+    }
+
+    /**
+     * Filter text based on minimum confidence threshold
+     */
+    private filterTextByConfidence(data: any, options: OCRProcessingOptions): { filteredText: string; overallConfidence: number } {
+        const minConfidence = this.getMinConfidenceThreshold();
+
+        // If no minimum confidence set, return original text
+        if (minConfidence <= 0) {
+            return {
+                filteredText: data.text.trim(),
+                overallConfidence: data.confidence / 100
+            };
+        }
+
+        const filteredWords: string[] = [];
+        const validConfidences: number[] = [];
+
+        // Tesseract provides word-level data
+        if (data.words && Array.isArray(data.words)) {
+            for (const word of data.words) {
+                const wordConfidence = word.confidence / 100; // Convert to decimal
+
+                if (wordConfidence >= minConfidence) {
+                    filteredWords.push(word.text);
+                    validConfidences.push(wordConfidence);
+                }
+            }
+        } else {
+            // Fallback: if word-level data not available, use overall confidence
+            const overallConfidence = data.confidence / 100;
+            if (overallConfidence >= minConfidence) {
+                return {
+                    filteredText: data.text.trim(),
+                    overallConfidence
+                };
+            }
+            log.info(`Entire text filtered out due to low confidence ${overallConfidence} (below threshold ${minConfidence})`);
+            return {
+                filteredText: '',
+                overallConfidence
+            };
+        }
+
+        // Calculate average confidence of accepted words
+        const averageConfidence = validConfidences.length > 0
+            ? validConfidences.reduce((sum, conf) => sum + conf, 0) / validConfidences.length
+            : 0;
+
+        const filteredText = filteredWords.join(' ').trim();
+
+        log.info(`Filtered OCR text: ${filteredWords.length} words kept out of ${data.words?.length || 0} total words (min confidence: ${minConfidence})`);
+
+        return {
+            filteredText,
+            overallConfidence: averageConfidence
+        };
+    }
+
+    /**
+     * Get minimum confidence threshold from options
+     */
+    private getMinConfidenceThreshold(): number {
+        const minConfidence = options.getOption('ocrMinConfidence') ?? 0;
+        return parseFloat(minConfidence);
+    }
+
+    /**
+     * Validate OCR language format
+     * Supports single language (eng) or multi-language (ron+eng)
+     */
+    private isValidLanguageFormat(language: string): boolean {
+        if (!language || typeof language !== 'string') {
+            return false;
+        }
+
+        // Split by '+' for multi-language format
+        const languages = language.split('+');
+
+        // Check each language code (should be 2-7 characters, alphanumeric with underscores)
+        const validLanguagePattern = /^[a-zA-Z]{2,3}(_[a-zA-Z]{2,3})?$/;
+
+        return languages.every(lang => {
+            const trimmed = lang.trim();
+            return trimmed.length > 0 && validLanguagePattern.test(trimmed);
+        });
+    }
+}
--- a/apps/server/src/services/ocr/processors/office_processor.ts
+++ b/apps/server/src/services/ocr/processors/office_processor.ts
@@ -0,0 +1,133 @@
+import * as officeParser from 'officeparser';
+
+import log from '../../log.js';
+import options from '../../options.js';
+import { OCRProcessingOptions,OCRResult } from '../ocr_service.js';
+import { FileProcessor } from './file_processor.js';
+import { ImageProcessor } from './image_processor.js';
+
+/**
+ * Office document processor for extracting text and images from DOCX/XLSX/PPTX files
+ */
+export class OfficeProcessor extends FileProcessor {
+    private imageProcessor: ImageProcessor;
+    private readonly supportedTypes = [
+        'application/vnd.openxmlformats-officedocument.wordprocessingml.document', // DOCX
+        'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', // XLSX
+        'application/vnd.openxmlformats-officedocument.presentationml.presentation', // PPTX
+        'application/msword', // DOC
+        'application/vnd.ms-excel', // XLS
+        'application/vnd.ms-powerpoint', // PPT
+        'application/rtf' // RTF
+    ];
+
+    constructor() {
+        super();
+        this.imageProcessor = new ImageProcessor();
+    }
+
+    canProcess(mimeType: string): boolean {
+        return this.supportedTypes.includes(mimeType);
+    }
+
+    getSupportedMimeTypes(): string[] {
+        return [...this.supportedTypes];
+    }
+
+    async extractText(buffer: Buffer, options: OCRProcessingOptions = {}): Promise<OCRResult> {
+        try {
+            log.info('Starting Office document text extraction...');
+
+            // Validate language format
+            const language = options.language || this.getDefaultOCRLanguage();
+            if (!this.isValidLanguageFormat(language)) {
+                throw new Error(`Invalid OCR language format: ${language}. Use format like 'eng' or 'ron+eng'`);
+            }
+
+            // Extract text from Office document
+            const data = await this.parseOfficeDocument(buffer);
+
+            // Extract text from Office document
+            const combinedText = data.data && data.data.trim().length > 0 ? data.data.trim() : '';
+            const confidence = combinedText.length > 0 ? 0.99 : 0; // High confidence for direct text extraction
+
+            const result: OCRResult = {
+                text: combinedText,
+                confidence,
+                extractedAt: new Date().toISOString(),
+                language,
+                pageCount: 1 // Office documents are treated as single logical document
+            };
+
+            log.info(`Office document text extraction completed. Confidence: ${confidence}%, Text length: ${result.text.length}`);
+            return result;
+
+        } catch (error) {
+            log.error(`Office document text extraction failed: ${error}`);
+            throw error;
+        }
+    }
+
+    private async parseOfficeDocument(buffer: Buffer): Promise<{ data: string }> {
+        try {
+            // Use promise-based API directly
+            const data = await officeParser.parseOfficeAsync(buffer, {
+                outputErrorToConsole: false,
+                newlineDelimiter: '\n',
+                ignoreNotes: false,
+                putNotesAtLast: false
+            });
+
+            return {
+                data: data || ''
+            };
+        } catch (error) {
+            throw new Error(`Office document parsing failed: ${error}`);
+        }
+    }
+
+    getProcessingType(): string {
+        return 'office';
+    }
+
+    async cleanup(): Promise<void> {
+        await this.imageProcessor.cleanup();
+    }
+
+    /**
+     * Get default OCR language from options
+     */
+    private getDefaultOCRLanguage(): string {
+        try {
+            const ocrLanguage = options.getOption('ocrLanguage');
+            if (!ocrLanguage) {
+                throw new Error('OCR language not configured in user settings');
+            }
+            return ocrLanguage;
+        } catch (error) {
+            log.error(`Failed to get default OCR language: ${error}`);
+            throw new Error('OCR language must be configured in settings before processing');
+        }
+    }
+
+    /**
+     * Validate OCR language format
+     * Supports single language (eng) or multi-language (ron+eng)
+     */
+    private isValidLanguageFormat(language: string): boolean {
+        if (!language || typeof language !== 'string') {
+            return false;
+        }
+
+        // Split by '+' for multi-language format
+        const languages = language.split('+');
+
+        // Check each language code (should be 2-7 characters, alphanumeric with underscores)
+        const validLanguagePattern = /^[a-zA-Z]{2,3}(_[a-zA-Z]{2,3})?$/;
+
+        return languages.every(lang => {
+            const trimmed = lang.trim();
+            return trimmed.length > 0 && validLanguagePattern.test(trimmed);
+        });
+    }
+}
--- a/apps/server/src/services/ocr/processors/pdf_processor.ts
+++ b/apps/server/src/services/ocr/processors/pdf_processor.ts
@@ -0,0 +1,147 @@
+import * as pdfParse from 'pdf-parse';
+
+import log from '../../log.js';
+import options from '../../options.js';
+import { OCRProcessingOptions,OCRResult } from '../ocr_service.js';
+import { FileProcessor } from './file_processor.js';
+import { ImageProcessor } from './image_processor.js';
+
+/**
+ * PDF processor for extracting text from PDF files
+ * First tries to extract existing text, then falls back to OCR on images
+ */
+export class PDFProcessor extends FileProcessor {
+    private imageProcessor: ImageProcessor;
+    private readonly supportedTypes = ['application/pdf'];
+
+    constructor() {
+        super();
+        this.imageProcessor = new ImageProcessor();
+    }
+
+    canProcess(mimeType: string): boolean {
+        return mimeType.toLowerCase() === 'application/pdf';
+    }
+
+    getSupportedMimeTypes(): string[] {
+        return [...this.supportedTypes];
+    }
+
+    async extractText(buffer: Buffer, options: OCRProcessingOptions = {}): Promise<OCRResult> {
+        try {
+            log.info('Starting PDF text extraction...');
+
+            // Validate language format
+            const language = options.language || this.getDefaultOCRLanguage();
+            if (!this.isValidLanguageFormat(language)) {
+                throw new Error(`Invalid OCR language format: ${language}. Use format like 'eng' or 'ron+eng'`);
+            }
+
+            // First try to extract existing text from PDF
+            if (options.enablePDFTextExtraction !== false) {
+                const textResult = await this.extractTextFromPDF(buffer, options);
+                if (textResult.text.trim().length > 0) {
+                    log.info(`PDF text extraction successful. Length: ${textResult.text.length}`);
+                    return textResult;
+                }
+            }
+
+            // Fall back to OCR if no text found or PDF text extraction is disabled
+            log.info('No text found in PDF or text extraction disabled, falling back to OCR...');
+            return await this.extractTextViaOCR(buffer, options);
+
+        } catch (error) {
+            log.error(`PDF text extraction failed: ${error}`);
+            throw error;
+        }
+    }
+
+    private async extractTextFromPDF(buffer: Buffer, options: OCRProcessingOptions): Promise<OCRResult> {
+        try {
+            const data = await pdfParse(buffer);
+
+            return {
+                text: data.text.trim(),
+                confidence: 0.99, // High confidence for direct text extraction
+                extractedAt: new Date().toISOString(),
+                language: options.language || this.getDefaultOCRLanguage(),
+                pageCount: data.numpages
+            };
+        } catch (error) {
+            log.error(`PDF text extraction failed: ${error}`);
+            throw error;
+        }
+    }
+
+    private async extractTextViaOCR(buffer: Buffer, options: OCRProcessingOptions): Promise<OCRResult> {
+        try {
+            // Convert PDF to images and OCR each page
+            // For now, we'll use a simple approach - convert first page to image
+            // In a full implementation, we'd convert all pages
+
+            // This is a simplified implementation
+            // In practice, you might want to use pdf2pic or similar library
+            // to convert PDF pages to images for OCR
+
+            // For now, we'll return a placeholder result
+            // indicating that OCR on PDF is not fully implemented
+            log.info('PDF to image conversion not fully implemented, returning placeholder');
+
+            return {
+                text: '[PDF OCR not fully implemented - would convert PDF pages to images and OCR each page]',
+                confidence: 0.0,
+                extractedAt: new Date().toISOString(),
+                language: options.language || this.getDefaultOCRLanguage(),
+                pageCount: 1
+            };
+        } catch (error) {
+            log.error(`PDF OCR extraction failed: ${error}`);
+            throw error;
+        }
+    }
+
+    getProcessingType(): string {
+        return 'pdf';
+    }
+
+    async cleanup(): Promise<void> {
+        await this.imageProcessor.cleanup();
+    }
+
+    /**
+     * Get default OCR language from options
+     */
+    private getDefaultOCRLanguage(): string {
+        try {
+            const ocrLanguage = options.getOption('ocrLanguage');
+            if (!ocrLanguage) {
+                throw new Error('OCR language not configured in user settings');
+            }
+            return ocrLanguage;
+        } catch (error) {
+            log.error(`Failed to get default OCR language: ${error}`);
+            throw new Error('OCR language must be configured in settings before processing');
+        }
+    }
+
+    /**
+     * Validate OCR language format
+     * Supports single language (eng) or multi-language (ron+eng)
+     */
+    private isValidLanguageFormat(language: string): boolean {
+        if (!language || typeof language !== 'string') {
+            return false;
+        }
+
+        // Split by '+' for multi-language format
+        const languages = language.split('+');
+
+        // Check each language code (should be 2-7 characters, alphanumeric with underscores)
+        const validLanguagePattern = /^[a-zA-Z]{2,3}(_[a-zA-Z]{2,3})?$/;
+
+        return languages.every(lang => {
+            const trimmed = lang.trim();
+            return trimmed.length > 0 && validLanguagePattern.test(trimmed);
+        });
+    }
+}
--- a/apps/server/src/services/ocr/processors/tiff_processor.ts
+++ b/apps/server/src/services/ocr/processors/tiff_processor.ts
@@ -0,0 +1,135 @@
+import sharp from 'sharp';
+
+import log from '../../log.js';
+import options from '../../options.js';
+import { OCRProcessingOptions,OCRResult } from '../ocr_service.js';
+import { FileProcessor } from './file_processor.js';
+import { ImageProcessor } from './image_processor.js';
+
+/**
+ * TIFF processor for extracting text from multi-page TIFF files
+ */
+export class TIFFProcessor extends FileProcessor {
+    private imageProcessor: ImageProcessor;
+    private readonly supportedTypes = ['image/tiff', 'image/tif'];
+
+    constructor() {
+        super();
+        this.imageProcessor = new ImageProcessor();
+    }
+
+    canProcess(mimeType: string): boolean {
+        return mimeType.toLowerCase() === 'image/tiff' || mimeType.toLowerCase() === 'image/tif';
+    }
+
+    getSupportedMimeTypes(): string[] {
+        return [...this.supportedTypes];
+    }
+
+    async extractText(buffer: Buffer, options: OCRProcessingOptions = {}): Promise<OCRResult> {
+        try {
+            log.info('Starting TIFF text extraction...');
+
+            // Validate language format
+            const language = options.language || this.getDefaultOCRLanguage();
+            if (!this.isValidLanguageFormat(language)) {
+                throw new Error(`Invalid OCR language format: ${language}. Use format like 'eng' or 'ron+eng'`);
+            }
+
+            // Check if this is a multi-page TIFF
+            const metadata = await sharp(buffer).metadata();
+            const pageCount = metadata.pages || 1;
+
+            let combinedText = '';
+            let totalConfidence = 0;
+
+            // Process each page
+            for (let page = 0; page < pageCount; page++) {
+                try {
+                    log.info(`Processing TIFF page ${page + 1}/${pageCount}...`);
+
+                    // Extract page as PNG buffer
+                    const pageBuffer = await sharp(buffer, { page })
+                        .png()
+                        .toBuffer();
+
+                    // OCR the page
+                    const pageResult = await this.imageProcessor.extractText(pageBuffer, options);
+
+                    if (pageResult.text.trim().length > 0) {
+                        if (combinedText.length > 0) {
+                            combinedText += `\n\n--- Page ${page + 1} ---\n`;
+                        }
+                        combinedText += pageResult.text;
+                        totalConfidence += pageResult.confidence;
+                    }
+                } catch (error) {
+                    log.error(`Failed to process TIFF page ${page + 1}: ${error}`);
+                    // Continue with other pages
+                }
+            }
+
+            const averageConfidence = pageCount > 0 ? totalConfidence / pageCount : 0;
+
+            const result: OCRResult = {
+                text: combinedText.trim(),
+                confidence: averageConfidence,
+                extractedAt: new Date().toISOString(),
+                language: options.language || this.getDefaultOCRLanguage(),
+                pageCount
+            };
+
+            log.info(`TIFF text extraction completed. Pages: ${pageCount}, Confidence: ${averageConfidence}%, Text length: ${result.text.length}`);
+            return result;
+
+        } catch (error) {
+            log.error(`TIFF text extraction failed: ${error}`);
+            throw error;
+        }
+    }
+
+    getProcessingType(): string {
+        return 'tiff';
+    }
+
+    async cleanup(): Promise<void> {
+        await this.imageProcessor.cleanup();
+    }
+
+    /**
+     * Get default OCR language from options
+     */
+    private getDefaultOCRLanguage(): string {
+        try {
+            const ocrLanguage = options.getOption('ocrLanguage');
+            if (!ocrLanguage) {
+                throw new Error('OCR language not configured in user settings');
+            }
+            return ocrLanguage;
+        } catch (error) {
+            log.error(`Failed to get default OCR language: ${error}`);
+            throw new Error('OCR language must be configured in settings before processing');
+        }
+    }
+
+    /**
+     * Validate OCR language format
+     * Supports single language (eng) or multi-language (ron+eng)
+     */
+    private isValidLanguageFormat(language: string): boolean {
+        if (!language || typeof language !== 'string') {
+            return false;
+        }
+
+        // Split by '+' for multi-language format
+        const languages = language.split('+');
+
+        // Check each language code (should be 2-7 characters, alphanumeric with underscores)
+        const validLanguagePattern = /^[a-zA-Z]{2,3}(_[a-zA-Z]{2,3})?$/;
+
+        return languages.every(lang => {
+            const trimmed = lang.trim();
+            return trimmed.length > 0 && validLanguagePattern.test(trimmed);
+        });
+    }
+}
--- a/apps/server/src/services/options_init.ts
+++ b/apps/server/src/services/options_init.ts
@@ -212,7 +212,13 @@ const defaultOptions: DefaultOption[] = [
    { name: "experimentalFeatures", value: "[]", isSynced: true },

    // AI / LLM
-    { name: "llmProviders", value: "[]", isSynced: false }
+    { name: "llmProviders", value: "[]", isSynced: false },
+
+    // OCR options
+    { name: "ocrEnabled", value: "false", isSynced: true },
+    { name: "ocrLanguage", value: "eng", isSynced: true },
+    { name: "ocrAutoProcessImages", value: "true", isSynced: true },
+    { name: "ocrMinConfidence", value: "0.55", isSynced: true },
 ];

 /**
--- a/apps/server/src/services/search/expressions/ocr_content.ts
+++ b/apps/server/src/services/search/expressions/ocr_content.ts
@@ -0,0 +1,111 @@
+import Expression from "./expression.js";
+import SearchContext from "../search_context.js";
+import NoteSet from "../note_set.js";
+import sql from "../../sql.js";
+import becca from "../../../becca/becca.js";
+
+/**
+ * Search expression for finding text within OCR-extracted content from images
+ */
+export default class OCRContentExpression extends Expression {
+    private searchText: string;
+
+    constructor(searchText: string) {
+        super();
+        this.searchText = searchText;
+    }
+
+    execute(inputNoteSet: NoteSet, executionContext: object, searchContext: SearchContext): NoteSet {
+        // Don't search OCR content if it's not enabled
+        if (!this.isOCRSearchEnabled()) {
+            return new NoteSet();
+        }
+
+        const resultNoteSet = new NoteSet();
+        const ocrResults = this.searchOCRContent(this.searchText);
+
+        for (const ocrResult of ocrResults) {
+            // Find notes that use this blob
+            const notes = sql.getRows<{noteId: string}>(`
+                SELECT noteId FROM notes 
+                WHERE blobId = ? AND isDeleted = 0
+            `, [ocrResult.blobId]);
+
+            for (const noteRow of notes) {
+                const note = becca.getNote(noteRow.noteId);
+                if (note && !note.isDeleted && inputNoteSet.hasNoteId(note.noteId)) {
+                    resultNoteSet.add(note);
+                }
+            }
+
+            // Find attachments that use this blob and their parent notes
+            const attachments = sql.getRows<{ownerId: string}>(`
+                SELECT ownerId FROM attachments
+                WHERE blobId = ? AND isDeleted = 0
+            `, [ocrResult.blobId]);
+
+            for (const attachmentRow of attachments) {
+                const note = becca.getNote(attachmentRow.ownerId);
+                if (note && !note.isDeleted && inputNoteSet.hasNoteId(note.noteId)) {
+                    resultNoteSet.add(note);
+                }
+            }
+        }
+
+        // Add highlight tokens for OCR matches
+        if (ocrResults.length > 0) {
+            const tokens = this.extractHighlightTokens(this.searchText);
+            searchContext.highlightedTokens.push(...tokens);
+        }
+
+        return resultNoteSet;
+    }
+
+    private isOCRSearchEnabled(): boolean {
+        try {
+            const optionService = require('../../options.js').default;
+            return optionService.getOptionBool('ocrEnabled');
+        } catch {
+            return false;
+        }
+    }
+
+    private searchOCRContent(searchText: string): Array<{
+        blobId: string;
+        textRepresentation: string;
+    }> {
+        try {
+            // Search in blobs table for OCR text
+            const query = `
+                SELECT blobId, textRepresentation
+                FROM blobs
+                WHERE textRepresentation LIKE ?
+                AND textRepresentation IS NOT NULL
+                AND textRepresentation != ''
+                LIMIT 50
+            `;
+            const params = [`%${searchText}%`];
+
+            return sql.getRows<{
+                blobId: string;
+                textRepresentation: string;
+            }>(query, params);
+        } catch (error) {
+            console.error('Error searching OCR content:', error);
+            return [];
+        }
+    }
+
+
+    private extractHighlightTokens(searchText: string): string[] {
+        // Split search text into words and return them as highlight tokens
+        return searchText
+            .split(/\s+/)
+            .filter(token => token.length > 2)
+            .map(token => token.toLowerCase());
+    }
+
+    toString(): string {
+        return `OCRContent('${this.searchText}')`;
+    }
+}
--- a/apps/server/src/services/search/search_result.ts
+++ b/apps/server/src/services/search/search_result.ts
@@ -1,12 +1,11 @@
-"use strict";
-
-import beccaService from "../../becca/becca_service.js";
 import becca from "../../becca/becca.js";
-import { 
-    normalizeSearchText, 
-    calculateOptimizedEditDistance, 
-    FUZZY_SEARCH_CONFIG 
-} from "./utils/text_utils.js";
+import beccaService from "../../becca/becca_service.js";
+import options from "../options.js";
+import sql from "../sql.js";
+import {
+    calculateOptimizedEditDistance,
+    FUZZY_SEARCH_CONFIG,
+    normalizeSearchText} from "./utils/text_utils.js";

 // Scoring constants for better maintainability
 const SCORE_WEIGHTS = {
@@ -85,6 +84,9 @@ class SearchResult {
        this.addScoreForStrings(tokens, note.title, SCORE_WEIGHTS.TITLE_FACTOR, enableFuzzyMatching);
        this.addScoreForStrings(tokens, this.notePathTitle, SCORE_WEIGHTS.PATH_FACTOR, enableFuzzyMatching);

+        // Add OCR scoring - weight between title and content matches
+        this.addOCRScore(tokens, 1.5);
+
        if (note.isInHiddenSubtree()) {
            this.score = this.score / SCORE_WEIGHTS.HIDDEN_NOTE_PENALTY;
        }
@@ -98,7 +100,7 @@ class SearchResult {
        for (const chunk of chunks) {
            for (const token of tokens) {
                const normalizedToken = normalizeSearchText(token.toLowerCase());
-                
+
                if (chunk === normalizedToken) {
                    tokenScore += SCORE_WEIGHTS.TOKEN_EXACT_MATCH * token.length * factor;
                } else if (chunk.startsWith(normalizedToken)) {
@@ -108,10 +110,10 @@ class SearchResult {
                } else {
                    // Try fuzzy matching for individual tokens with caps applied
                    const editDistance = calculateOptimizedEditDistance(chunk, normalizedToken, FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE);
-                    if (editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE && 
+                    if (editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE &&
                        normalizedToken.length >= FUZZY_SEARCH_CONFIG.MIN_FUZZY_TOKEN_LENGTH &&
                        this.fuzzyScore < SCORE_WEIGHTS.MAX_TOTAL_FUZZY_SCORE) {
-                        
+
                        const fuzzyWeight = SCORE_WEIGHTS.TOKEN_FUZZY_MATCH * (1 - editDistance / FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE);
                        // Apply caps: limit token length multiplier and per-token contribution
                        const cappedTokenLength = Math.min(token.length, SCORE_WEIGHTS.MAX_FUZZY_TOKEN_LENGTH_MULTIPLIER);
@@ -119,7 +121,7 @@ class SearchResult {
                            fuzzyWeight * cappedTokenLength * factor,
                            SCORE_WEIGHTS.MAX_FUZZY_SCORE_PER_TOKEN
                        );
-                        
+
                        tokenScore += fuzzyTokenScore;
                        this.fuzzyScore += fuzzyTokenScore;
                    }
@@ -129,13 +131,43 @@ class SearchResult {
        this.score += tokenScore;
    }

+    addOCRScore(tokens: string[], factor: number) {
+        try {
+            // Check if OCR is enabled
+            if (!options.getOptionBool('ocrEnabled')) {
+                return;
+            }
+
+            // Search for OCR results for this note and its attachments
+            const ocrResults = sql.getRows(`
+                SELECT b.textRepresentation
+                FROM blobs b
+                WHERE b.textRepresentation IS NOT NULL
+                  AND b.textRepresentation != ''
+                  AND (
+                      b.blobId = (SELECT blobId FROM notes WHERE noteId = ? AND isDeleted = 0)
+                      OR b.blobId IN (
+                          SELECT blobId FROM attachments WHERE ownerId = ? AND isDeleted = 0
+                      )
+                  )
+            `, [this.noteId, this.noteId]);
+
+            for (const ocrResult of ocrResults as Array<{textRepresentation: string}>) {
+                // Add score for OCR text matches
+                this.addScoreForStrings(tokens, ocrResult.textRepresentation, factor);
+            }
+        } catch (error) {
+            // Silently fail if OCR service is not available
+            console.debug('OCR scoring failed:', error);
+        }
+    }

    /**
     * Checks if the query matches as a complete word in the text
     */
    private isWordMatch(text: string, query: string): boolean {
-        return text.includes(` ${query} `) || 
-               text.startsWith(`${query} `) || 
+        return text.includes(` ${query} `) ||
+               text.startsWith(`${query} `) ||
               text.endsWith(` ${query}`);
    }

@@ -147,21 +179,21 @@ class SearchResult {
        if (this.fuzzyScore >= SCORE_WEIGHTS.MAX_TOTAL_FUZZY_SCORE) {
            return 0;
        }
-        
+
        const editDistance = calculateOptimizedEditDistance(title, query, FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE);
        const maxLen = Math.max(title.length, query.length);
-        
+
        // Only apply fuzzy matching if the query is reasonably long and edit distance is small
-        if (query.length >= FUZZY_SEARCH_CONFIG.MIN_FUZZY_TOKEN_LENGTH && 
-            editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE && 
+        if (query.length >= FUZZY_SEARCH_CONFIG.MIN_FUZZY_TOKEN_LENGTH &&
+            editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE &&
            editDistance / maxLen <= 0.3) {
            const similarity = 1 - (editDistance / maxLen);
            const baseFuzzyScore = SCORE_WEIGHTS.TITLE_WORD_MATCH * similarity * 0.7; // Reduced weight for fuzzy matches
-            
+
            // Apply cap to ensure fuzzy title matches don't exceed reasonable bounds
            return Math.min(baseFuzzyScore, SCORE_WEIGHTS.MAX_TOTAL_FUZZY_SCORE * 0.3);
        }
-        
+
        return 0;
    }

--- a/apps/server/src/services/search/search_result_ocr.spec.ts
+++ b/apps/server/src/services/search/search_result_ocr.spec.ts
@@ -0,0 +1,337 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+
+// Mock dependencies
+const mockSql = {
+    getRows: vi.fn()
+};
+
+const mockOptions = {
+    getOptionBool: vi.fn()
+};
+
+const mockBecca = {
+    notes: {},
+    getNote: vi.fn()
+};
+
+const mockBeccaService = {
+    getNoteTitleForPath: vi.fn()
+};
+
+vi.mock('../sql.js', () => ({
+    default: mockSql
+}));
+
+vi.mock('../options.js', () => ({
+    default: mockOptions
+}));
+
+// The SearchResult now uses proper ES imports which are mocked above
+
+vi.mock('../../becca/becca.js', () => ({
+    default: mockBecca
+}));
+
+vi.mock('../../becca/becca_service.js', () => ({
+    default: mockBeccaService
+}));
+
+// Import SearchResult after mocking
+let SearchResult: any;
+
+beforeEach(async () => {
+    vi.clearAllMocks();
+    
+    // Reset mock implementations
+    mockOptions.getOptionBool.mockReturnValue(true);
+    mockSql.getRows.mockReturnValue([]);
+    mockBeccaService.getNoteTitleForPath.mockReturnValue('Test Note Title');
+    
+    // Setup mock note
+    const mockNote = {
+        noteId: 'test123',
+        title: 'Test Note',
+        isInHiddenSubtree: vi.fn().mockReturnValue(false)
+    };
+    mockBecca.notes['test123'] = mockNote;
+    
+    // Dynamically import SearchResult
+    const module = await import('./search_result.js');
+    SearchResult = module.default;
+});
+
+describe('SearchResult', () => {
+    describe('constructor', () => {
+        it('should initialize with note path array', () => {
+            const searchResult = new SearchResult(['root', 'folder', 'test123']);
+            
+            expect(searchResult.notePathArray).toEqual(['root', 'folder', 'test123']);
+            expect(searchResult.noteId).toBe('test123');
+            expect(searchResult.notePath).toBe('root/folder/test123');
+            expect(searchResult.score).toBe(0);
+            expect(mockBeccaService.getNoteTitleForPath).toHaveBeenCalledWith(['root', 'folder', 'test123']);
+        });
+    });
+
+    describe('computeScore', () => {
+        let searchResult: any;
+        
+        beforeEach(() => {
+            searchResult = new SearchResult(['root', 'test123']);
+        });
+
+        describe('basic scoring', () => {
+            it('should give highest score for exact note ID match', () => {
+                searchResult.computeScore('test123', ['test123']);
+                expect(searchResult.score).toBeGreaterThanOrEqual(1000);
+            });
+
+            it('should give high score for exact title match', () => {
+                searchResult.computeScore('test note', ['test', 'note']);
+                expect(searchResult.score).toBeGreaterThan(2000);
+            });
+
+            it('should give medium score for title prefix match', () => {
+                searchResult.computeScore('test', ['test']);
+                expect(searchResult.score).toBeGreaterThan(500);
+            });
+
+            it('should give lower score for title word match', () => {
+                mockBecca.notes['test123'].title = 'This is a test note';
+                searchResult.computeScore('test', ['test']);
+                expect(searchResult.score).toBeGreaterThan(300);
+            });
+        });
+
+        describe('OCR scoring integration', () => {
+            beforeEach(() => {
+                // Mock OCR-enabled
+                mockOptions.getOptionBool.mockReturnValue(true);
+            });
+
+            it('should add OCR score when OCR results exist', () => {
+                const mockOCRResults = [
+                    {
+                        extracted_text: 'sample text from image',
+                        confidence: 0.95
+                    }
+                ];
+                mockSql.getRows.mockReturnValue(mockOCRResults);
+
+                searchResult.computeScore('sample', ['sample']);
+
+                expect(mockSql.getRows).toHaveBeenCalledWith(
+                    expect.stringContaining('FROM ocr_results'),
+                    ['test123', 'test123']
+                );
+                expect(searchResult.score).toBeGreaterThan(0);
+            });
+
+            it('should apply confidence weighting to OCR scores', () => {
+                const highConfidenceResult = [
+                    {
+                        extracted_text: 'sample text',
+                        confidence: 0.95
+                    }
+                ];
+                const lowConfidenceResult = [
+                    {
+                        extracted_text: 'sample text',
+                        confidence: 0.30
+                    }
+                ];
+
+                // Test high confidence
+                mockSql.getRows.mockReturnValue(highConfidenceResult);
+                searchResult.computeScore('sample', ['sample']);
+                const highConfidenceScore = searchResult.score;
+
+                // Reset and test low confidence
+                searchResult.score = 0;
+                mockSql.getRows.mockReturnValue(lowConfidenceResult);
+                searchResult.computeScore('sample', ['sample']);
+                const lowConfidenceScore = searchResult.score;
+
+                expect(highConfidenceScore).toBeGreaterThan(lowConfidenceScore);
+            });
+
+            it('should handle multiple OCR results', () => {
+                const multipleResults = [
+                    {
+                        extracted_text: 'first sample text',
+                        confidence: 0.90
+                    },
+                    {
+                        extracted_text: 'second sample document',
+                        confidence: 0.85
+                    }
+                ];
+                mockSql.getRows.mockReturnValue(multipleResults);
+
+                searchResult.computeScore('sample', ['sample']);
+
+                expect(searchResult.score).toBeGreaterThan(0);
+                // Score should account for multiple matches
+            });
+
+            it('should skip OCR scoring when OCR is disabled', () => {
+                mockOptions.getOptionBool.mockReturnValue(false);
+                
+                searchResult.computeScore('sample', ['sample']);
+                
+                expect(mockSql.getRows).not.toHaveBeenCalled();
+            });
+
+            it('should handle OCR scoring errors gracefully', () => {
+                mockSql.getRows.mockImplementation(() => {
+                    throw new Error('Database error');
+                });
+
+                expect(() => {
+                    searchResult.computeScore('sample', ['sample']);
+                }).not.toThrow();
+                
+                // Score should still be calculated from other factors
+                expect(searchResult.score).toBeGreaterThanOrEqual(0);
+            });
+        });
+
+        describe('hidden notes penalty', () => {
+            it('should apply penalty for hidden notes', () => {
+                mockBecca.notes['test123'].isInHiddenSubtree.mockReturnValue(true);
+                
+                searchResult.computeScore('test', ['test']);
+                const hiddenScore = searchResult.score;
+                
+                // Reset and test non-hidden
+                mockBecca.notes['test123'].isInHiddenSubtree.mockReturnValue(false);
+                searchResult.score = 0;
+                searchResult.computeScore('test', ['test']);
+                const normalScore = searchResult.score;
+                
+                expect(normalScore).toBeGreaterThan(hiddenScore);
+                expect(hiddenScore).toBe(normalScore / 3);
+            });
+        });
+    });
+
+    describe('addScoreForStrings', () => {
+        let searchResult: any;
+        
+        beforeEach(() => {
+            searchResult = new SearchResult(['root', 'test123']);
+        });
+
+        it('should give highest score for exact token match', () => {
+            searchResult.addScoreForStrings(['sample'], 'sample text', 1.0);
+            const exactScore = searchResult.score;
+            
+            searchResult.score = 0;
+            searchResult.addScoreForStrings(['sample'], 'sampling text', 1.0);
+            const prefixScore = searchResult.score;
+            
+            searchResult.score = 0;
+            searchResult.addScoreForStrings(['sample'], 'text sample text', 1.0);
+            const partialScore = searchResult.score;
+            
+            expect(exactScore).toBeGreaterThan(prefixScore);
+            expect(exactScore).toBeGreaterThanOrEqual(partialScore);
+        });
+
+        it('should apply factor multiplier correctly', () => {
+            searchResult.addScoreForStrings(['sample'], 'sample text', 2.0);
+            const doubleFactorScore = searchResult.score;
+            
+            searchResult.score = 0;
+            searchResult.addScoreForStrings(['sample'], 'sample text', 1.0);
+            const singleFactorScore = searchResult.score;
+            
+            expect(doubleFactorScore).toBe(singleFactorScore * 2);
+        });
+
+        it('should handle multiple tokens', () => {
+            searchResult.addScoreForStrings(['hello', 'world'], 'hello world test', 1.0);
+            expect(searchResult.score).toBeGreaterThan(0);
+        });
+
+        it('should be case insensitive', () => {
+            searchResult.addScoreForStrings(['sample'], 'sample text', 1.0);
+            const lowerCaseScore = searchResult.score;
+            
+            searchResult.score = 0;
+            searchResult.addScoreForStrings(['sample'], 'SAMPLE text', 1.0);
+            const upperCaseScore = searchResult.score;
+            
+            expect(upperCaseScore).toEqual(lowerCaseScore);
+            expect(upperCaseScore).toBeGreaterThan(0);
+        });
+    });
+
+    describe('addOCRScore', () => {
+        let searchResult: any;
+        
+        beforeEach(() => {
+            searchResult = new SearchResult(['root', 'test123']);
+        });
+
+        it('should query for both note and attachment OCR results', () => {
+            mockOptions.getOptionBool.mockReturnValue(true);
+            mockSql.getRows.mockReturnValue([]);
+            
+            searchResult.addOCRScore(['sample'], 1.5);
+            
+            expect(mockSql.getRows).toHaveBeenCalledWith(
+                expect.stringContaining('FROM ocr_results'),
+                ['test123', 'test123']
+            );
+        });
+
+        it('should apply minimum confidence multiplier', () => {
+            mockOptions.getOptionBool.mockReturnValue(true);
+            const lowConfidenceResult = [
+                {
+                    extracted_text: 'sample text',
+                    confidence: 0.1 // Very low confidence
+                }
+            ];
+            mockSql.getRows.mockReturnValue(lowConfidenceResult);
+            
+            searchResult.addOCRScore(['sample'], 1.0);
+            
+            // Should still get some score due to minimum 0.5x multiplier
+            expect(searchResult.score).toBeGreaterThan(0);
+        });
+
+        it('should handle database query errors', () => {
+            mockOptions.getOptionBool.mockReturnValue(true);
+            mockSql.getRows.mockImplementation(() => {
+                throw new Error('Database connection failed');
+            });
+            
+            // Should not throw error
+            expect(() => {
+                searchResult.addOCRScore(['sample'], 1.5);
+            }).not.toThrow();
+        });
+
+        it('should skip when OCR is disabled', () => {
+            mockOptions.getOptionBool.mockReturnValue(false);
+            
+            searchResult.addOCRScore(['sample'], 1.5);
+            
+            expect(mockSql.getRows).not.toHaveBeenCalled();
+        });
+
+        it('should handle options service errors', () => {
+            mockOptions.getOptionBool.mockImplementation(() => {
+                throw new Error('Options service unavailable');
+            });
+            
+            expect(() => {
+                searchResult.addOCRScore(['sample'], 1.5);
+            }).not.toThrow();
+            
+            expect(mockSql.getRows).not.toHaveBeenCalled();
+        });
+    });
+});
--- a/apps/server/src/services/search/services/parse.ts
+++ b/apps/server/src/services/search/services/parse.ts
@@ -1,28 +1,30 @@
-"use strict";
+

 import { dayjs } from "@triliumnext/commons";
+
+import { removeDiacritic } from "../../utils.js";
+import AncestorExp from "../expressions/ancestor.js";
 import AndExp from "../expressions/and.js";
-import OrExp from "../expressions/or.js";
-import NotExp from "../expressions/not.js";
+import AttributeExistsExp from "../expressions/attribute_exists.js";
 import ChildOfExp from "../expressions/child_of.js";
 import DescendantOfExp from "../expressions/descendant_of.js";
-import ParentOfExp from "../expressions/parent_of.js";
-import RelationWhereExp from "../expressions/relation_where.js";
-import PropertyComparisonExp from "../expressions/property_comparison.js";
-import AttributeExistsExp from "../expressions/attribute_exists.js";
-import LabelComparisonExp from "../expressions/label_comparison.js";
-import NoteFlatTextExp from "../expressions/note_flat_text.js";
-import NoteContentFulltextExp from "../expressions/note_content_fulltext.js";
-import OrderByAndLimitExp from "../expressions/order_by_and_limit.js";
-import AncestorExp from "../expressions/ancestor.js";
-import buildComparator from "./build_comparator.js";
-import ValueExtractor from "../value_extractor.js";
-import { removeDiacritic } from "../../utils.js";
-import TrueExp from "../expressions/true.js";
-import IsHiddenExp from "../expressions/is_hidden.js";
-import type SearchContext from "../search_context.js";
-import type { TokenData, TokenStructure } from "./types.js";
 import type Expression from "../expressions/expression.js";
+import IsHiddenExp from "../expressions/is_hidden.js";
+import LabelComparisonExp from "../expressions/label_comparison.js";
+import NotExp from "../expressions/not.js";
+import NoteContentFulltextExp from "../expressions/note_content_fulltext.js";
+import NoteFlatTextExp from "../expressions/note_flat_text.js";
+import OCRContentExpression from "../expressions/ocr_content.js";
+import OrExp from "../expressions/or.js";
+import OrderByAndLimitExp from "../expressions/order_by_and_limit.js";
+import ParentOfExp from "../expressions/parent_of.js";
+import PropertyComparisonExp from "../expressions/property_comparison.js";
+import RelationWhereExp from "../expressions/relation_where.js";
+import TrueExp from "../expressions/true.js";
+import type SearchContext from "../search_context.js";
+import ValueExtractor from "../value_extractor.js";
+import buildComparator from "./build_comparator.js";
+import type { TokenData, TokenStructure } from "./types.js";

 function getFulltext(_tokens: TokenData[], searchContext: SearchContext, leadingOperator?: string) {
    const tokens: string[] = _tokens.map((t) => removeDiacritic(t.token));
@@ -42,16 +44,33 @@ function getFulltext(_tokens: TokenData[], searchContext: SearchContext, leading
            // Exact match on title OR exact match on content OR exact match in flat text (includes attributes)
            // For multi-word, join tokens with space to form exact phrase
            const titleSearchValue = tokens.join(" ");
-            return new OrExp([
+            const exactMatchExpressions: Expression[] = [
                new PropertyComparisonExp(searchContext, "title", "=", titleSearchValue),
                new NoteContentFulltextExp("=", { tokens, flatText: false }),
                new NoteContentFulltextExp("=", { tokens, flatText: true })
-            ]);
+            ];
+
+            // Add OCR content search for each token
+            for (const token of tokens) {
+                exactMatchExpressions.push(new OCRContentExpression(token));
+            }
+
+            return new OrExp(exactMatchExpressions);
        }
-        return new OrExp([new NoteFlatTextExp(tokens), new NoteContentFulltextExp(operator, { tokens, flatText: true })]);
-    } else {
-        return new NoteFlatTextExp(tokens);
+
+        const searchExpressions: Expression[] = [
+            new NoteFlatTextExp(tokens),
+            new NoteContentFulltextExp(operator, { tokens, flatText: true })
+        ];
+
+        // Add OCR content search for each token
+        for (const token of tokens) {
+            searchExpressions.push(new OCRContentExpression(token));
+        }
+
+        return new OrExp(searchExpressions);
    }
+    return new NoteFlatTextExp(tokens);
 }

 const OPERATORS = new Set(["=", "!=", "*=*", "*=", "=*", ">", ">=", "<", "<=", "%=", "~=", "~*"]);
@@ -298,9 +317,9 @@ function getExpression(tokens: TokenData[], searchContext: SearchContext, level
            searchContext.addError(`Relation can be compared only with property, e.g. ~relation.title=hello in ${context(i)}`);

            return null;
-        } else {
-            return new AttributeExistsExp("relation", relationName, searchContext.fuzzyAttributeSearch);
        }
+        return new AttributeExistsExp("relation", relationName, searchContext.fuzzyAttributeSearch);
+
    }

    function parseOrderByAndLimit() {
@@ -308,7 +327,7 @@ function getExpression(tokens: TokenData[], searchContext: SearchContext, level
            valueExtractor: ValueExtractor;
            direction: string;
        }[] = [];
-        let limit: number | undefined = undefined;
+        let limit: number | undefined;

        if (tokens[i].token === "orderby") {
            do {
@@ -354,9 +373,9 @@ function getExpression(tokens: TokenData[], searchContext: SearchContext, level
            return AndExp.of(expressions);
        } else if (op === "or") {
            return OrExp.of(expressions);
-        } else {
-            throw new Error(`Unrecognized op=${op}`);
        }
+        throw new Error(`Unrecognized op=${op}`);
+
    }

    for (i = 0; i < tokens.length; i++) {
@@ -423,7 +442,7 @@ function getExpression(tokens: TokenData[], searchContext: SearchContext, level
            } else if (op !== token) {
                searchContext.addError("Mixed usage of AND/OR - always use parenthesis to group AND/OR expressions.");
            }
-        } else if (isOperator({ token: token })) {
+        } else if (isOperator({ token })) {
            searchContext.addError(`Misplaced or incomplete expression "${token}"`);
        } else {
            searchContext.addError(`Unrecognized expression "${token}"`);
@@ -493,9 +512,9 @@ function getAncestorExp({ ancestorNoteId, ancestorDepth, includeHiddenNotes }: S
        return new AncestorExp(ancestorNoteId, ancestorDepth);
    } else if (!includeHiddenNotes) {
        return new NotExp(new IsHiddenExp());
-    } else {
-        return null;
    }
+    return null;
+
 }

 export default parse;
--- a/apps/website/package.json
+++ b/apps/website/package.json
@@ -9,7 +9,7 @@
 		"preview": "pnpm build && vite preview"
 	},
 	"dependencies": {
-		"i18next": "25.10.10",
+		"i18next": "26.0.1",
 		"i18next-http-backend": "3.0.2",
 		"preact": "10.29.0",
 		"preact-iso": "2.11.1",
--- a/apps/website/src/i18n.ts
+++ b/apps/website/src/i18n.ts
@@ -27,8 +27,7 @@ export function initTranslations(lng: string) {
        initAsync: false,
        react: {
            useSuspense: false
-        },
-        showSupportNotice: false
+        }
    });
 }

--- a/eng.traineddata
+++ b/eng.traineddata
--- a/package.json
+++ b/package.json
@@ -36,7 +36,7 @@
    "test:all": "pnpm test:parallel && pnpm test:sequential",
    "test:parallel": "pnpm --filter=!server --filter=!ckeditor5-mermaid --filter=!ckeditor5-math --parallel test",
    "test:sequential": "pnpm --filter=server --filter=ckeditor5-mermaid --filter=ckeditor5-math --sequential test",
-    "typecheck": "tsc --build",
+    "typecheck": "tsx scripts/filter-tsc-output.mts",
    "dev:format-check": "eslint -c eslint.format.config.mjs .",
    "dev:format-fix": "eslint -c eslint.format.config.mjs . --fix",
    "dev:linter-check": "cross-env NODE_OPTIONS=--max_old_space_size=4096 eslint .",
--- a/packages/commons/src/lib/options_interface.ts
+++ b/packages/commons/src/lib/options_interface.ts
@@ -144,6 +144,12 @@ export interface OptionDefinitions extends KeyboardShortcutsOptions<KeyboardActi
    // AI / LLM
    /** JSON array of configured LLM providers with their API keys */
    llmProviders: string;
+
+    // OCR options
+    ocrEnabled: boolean;
+    ocrLanguage: string;
+    ocrAutoProcessImages: boolean;
+    ocrMinConfidence: string;
 }

 export type OptionNames = keyof OptionDefinitions;
--- a/packages/commons/src/lib/rows.ts
+++ b/packages/commons/src/lib/rows.ts
@@ -72,6 +72,7 @@ export interface BlobRow {
    blobId: string;
    content: string | Buffer;
    contentLength: number;
+    textRepresentation?: string | null;
    dateModified: string;
    utcDateModified: string;
 }
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
--- a/scripts/filter-tsc-output.mts
+++ b/scripts/filter-tsc-output.mts
@@ -0,0 +1,56 @@
+/**
+ * Runs `tsc --build` and filters out noisy cascade errors (TS6305).
+ * Numbers each remaining error and prints a summary at the end.
+ */
+
+import { execSync } from "child_process";
+
+const SUPPRESSED_CODES = [ "TS6305" ];
+const ERROR_LINE_PATTERN = /^.+\(\d+,\d+\): error TS\d+:/;
+
+let output: string;
+try {
+    output = execSync("tsc --build", {
+        encoding: "utf-8",
+        stdio: [ "inherit", "pipe", "pipe" ]
+    });
+} catch (err: unknown) {
+    const execErr = err as { stdout?: string; stderr?: string };
+    output = (execErr.stdout ?? "") + (execErr.stderr ?? "");
+}
+
+const lines = output.split(/\r?\n/);
+const filtered = lines.filter(
+    (line) => !SUPPRESSED_CODES.some((code) => line.includes(code))
+);
+
+let errorIndex = 0;
+const numbered: string[] = [];
+const seen = new Set<string>();
+let skipContinuation = false;
+
+for (const line of filtered) {
+    if (ERROR_LINE_PATTERN.test(line)) {
+        if (seen.has(line)) {
+            skipContinuation = true;
+            continue;
+        }
+        seen.add(line);
+        skipContinuation = false;
+        errorIndex++;
+        numbered.push(`[${errorIndex}] ${line}`);
+    } else if (line.trim()) {
+        // Continuation line (indented context for multi-line errors)
+        if (!skipContinuation) {
+            numbered.push(line);
+        }
+    }
+}
+
+if (errorIndex > 0) {
+    console.log(numbered.join("\n"));
+    console.log(`\n${errorIndex} error(s) found.`);
+    process.exit(1);
+} else {
+    console.log("No errors found.");
+}
Author	SHA1	Message	Date
Elian Doran	6393d2c188	chore(ocr): remove trainneddata artifact	2026-04-01 17:08:15 +03:00
Elian Doran	d9f0a163cf	refactor(ocr): use idiomatic status handling	2026-04-01 17:04:36 +03:00
Elian Doran	6534beec14	fix(ocr): errors not properly shown due to lack of convention	2026-04-01 16:58:34 +03:00
Elian Doran	6d050340ee	fix(client): server errors don't reject the promise	2026-04-01 16:53:50 +03:00
Elian Doran	0e7f7fa208	chore(ocr): fix type issues & integrate ReadOnlyTextRepresentation	2026-04-01 16:45:38 +03:00
Elian Doran	287be0bd25	chore(scripts): integrate filter-tsc-output from standalone branch	2026-04-01 16:39:54 +03:00
Elian Doran	18cf2ff873	test(ocr): fix type issues	2026-04-01 16:35:45 +03:00
Elian Doran	b626fb448b	refactor(ocr): get rid of require imports	2026-04-01 16:30:27 +03:00
Elian Doran	38f6fb5a7f	refactor(ocr): rename `ocr_last_processed` to `textExtractionLastProcessed`	2026-04-01 16:26:16 +03:00
Elian Doran	5846df7d02	refactor(ocr): rename `ocr_text` to `textRepresentation`	2026-04-01 16:14:08 +03:00
Elian Doran	9462d6109c	Merge remote-tracking branch 'origin/main' into feat/add-ocr-capabilities	2026-04-01 15:59:05 +03:00
Elian Doran	0d805a01c1	fix(deps): update dependency i18next to v26 (#9224 )	2026-04-01 10:58:03 +03:00
copilot-swe-agent[bot]	7f1e4c0969	fix: remove showSupportNotice from i18next init options (removed in v26) Agent-Logs-Url: https://github.com/TriliumNext/Trilium/sessions/41f772f7-49b7-4905-8b17-cf90165fc736 Co-authored-by: eliandoran <21236836+eliandoran@users.noreply.github.com>	2026-03-31 20:13:27 +00:00
renovate[bot]	e55cd7841f	fix(deps): update dependency i18next to v26	2026-03-31 20:03:35 +00:00
Elian Doran	b9cef158d8	Merge remote-tracking branch 'origin/main' into feat/add-ocr-capabilities	2025-07-31 08:25:30 +03:00
Elian Doran	5ec6141369	feat(ocr): filter out text based on confidence	2025-07-26 14:57:12 +03:00
Elian Doran	55ac1e01f2	chore(ocr): improve ocr search result style	2025-07-26 14:15:45 +03:00
Elian Doran	65b58c3668	feat(ocr): auto-process images only if enabled in settings	2025-07-26 14:12:22 +03:00
Elian Doran	2cb4e5e8dc	feat(ocr): run the image operation in the background	2025-07-26 14:07:23 +03:00
Elian Doran	72cea245f1	feat(ocr): automatically process images	2025-07-26 14:00:35 +03:00
Elian Doran	08ca86c68a	chore(deps): move workspace dependencies to server	2025-07-26 13:48:28 +03:00
Elian Doran	925c9c1e7b	feat(ocr): display OCR text only in search results	2025-07-26 12:55:52 +03:00
Elian Doran	6212ea0304	feat(ocr): display OCR text in search results	2025-07-26 12:41:30 +03:00
Elian Doran	f295592134	fix(ocr): search error due to scoring	2025-07-26 12:33:45 +03:00
Elian Doran	69b0973e6d	feat(ocr): add a button to trigger an OCR manually	2025-07-26 12:18:20 +03:00
Elian Doran	422d318dac	feat(ocr): add an option to display OCR text	2025-07-26 12:08:04 +03:00
Elian Doran	c55aa6ee88	refactor(ocr): unnecessary initialization logic	2025-07-26 11:56:48 +03:00
Elian Doran	090b175152	refactor(ocr): deduplicate mime types partially	2025-07-26 11:51:53 +03:00
Elian Doran	11e9b097a2	feat(ocr): basic processing of new files	2025-07-26 11:46:28 +03:00
Elian Doran	2adfc1d32b	chore(ci): remove unnecessary change	2025-07-26 11:24:42 +03:00
Elian Doran	99fa5d89e7	Merge remote-tracking branch 'origin/main' into feat/add-ocr-capabilities	2025-07-26 10:33:01 +03:00
perf3ct	ca8cbf8ccf	feat(ocr): add additional processors for OCR feature	2025-07-16 20:10:56 +00:00
perf3ct	6722d2d266	feat(ocr): implement new language selection form	2025-07-16 20:10:41 +00:00
perf3ct	508cbeaa1b	feat(ocr): update this new migration to also add a `ocr_last_processed` column	2025-07-16 20:10:07 +00:00
perf3ct	e040865905	feat(ocr): add officeparser, pdf-parse, and sharp dependencies for ocr	2025-07-16 20:09:41 +00:00
perf3ct	a7878dd2c6	Merge branch 'main' into feat/add-ocr-capabilities	2025-07-16 17:54:32 +00:00
Jon Fuller	02980834ad	Merge branch 'main' into feat/add-ocr-capabilities	2025-07-15 10:10:47 -07:00
perf3ct	2a8c8871c4	fix(dev): resolve issues with pnpm-lock.yaml	2025-07-14 16:41:02 +00:00
perf3ct	893be24c1d	merge main into feature branch	2025-07-14 16:38:22 +00:00
perf3ct	9029f59410	feat(ocr): swap from custom table to using the blobs table, with a new column	2025-07-14 16:15:15 +00:00
Jon Fuller	4b5e8d33a6	Update playwright.yml	2025-06-10 15:37:05 -07:00
perf3ct	09196c045f	fix(ocr): obviously don't need this migration file anymore	2025-06-10 20:59:17 +00:00
perf3ct	7868ebec1e	fix(unit): also fix broken llm test	2025-06-10 20:51:34 +00:00
perf3ct	80a9182f05	feat(unit): ocr tests almost pass...	2025-06-10 20:41:40 +00:00
perf3ct	d20b3d854f	feat(unit): ocr tests almost pass...	2025-06-10 20:36:52 +00:00
perf3ct	f1356228a3	feat(unit): ocr unit tests almost pass	2025-06-10 20:22:31 +00:00
perf3ct	a4adc51e50	fix(unit): resolve typecheck errors	2025-06-10 19:48:48 +00:00
perf3ct	864543e4f9	feat(ocr): drop confidence down a little bit	2025-06-10 19:22:46 +00:00
perf3ct	33a549202b	fix(package): referenced wrong tesseract.js lol	2025-06-10 19:19:17 +00:00
perf3ct	c4a0219b18	feat(ocr): add unit tests, resolve double sent headers, and fix the wonderful tesseract.js path issues	2025-06-10 19:12:50 +00:00