Compare commits

...

36 Commits

Author SHA1 Message Date
Elian Doran
b9cef158d8 Merge remote-tracking branch 'origin/main' into feat/add-ocr-capabilities 2025-07-31 08:25:30 +03:00
Elian Doran
5ec6141369 feat(ocr): filter out text based on confidence 2025-07-26 14:57:12 +03:00
Elian Doran
55ac1e01f2 chore(ocr): improve ocr search result style 2025-07-26 14:15:45 +03:00
Elian Doran
65b58c3668 feat(ocr): auto-process images only if enabled in settings 2025-07-26 14:12:22 +03:00
Elian Doran
2cb4e5e8dc feat(ocr): run the image operation in the background 2025-07-26 14:07:23 +03:00
Elian Doran
72cea245f1 feat(ocr): automatically process images 2025-07-26 14:00:35 +03:00
Elian Doran
08ca86c68a chore(deps): move workspace dependencies to server 2025-07-26 13:48:28 +03:00
Elian Doran
925c9c1e7b feat(ocr): display OCR text only in search results 2025-07-26 12:55:52 +03:00
Elian Doran
6212ea0304 feat(ocr): display OCR text in search results 2025-07-26 12:41:30 +03:00
Elian Doran
f295592134 fix(ocr): search error due to scoring 2025-07-26 12:33:45 +03:00
Elian Doran
69b0973e6d feat(ocr): add a button to trigger an OCR manually 2025-07-26 12:18:20 +03:00
Elian Doran
422d318dac feat(ocr): add an option to display OCR text 2025-07-26 12:08:04 +03:00
Elian Doran
c55aa6ee88 refactor(ocr): unnecessary initialization logic 2025-07-26 11:56:48 +03:00
Elian Doran
090b175152 refactor(ocr): deduplicate mime types partially 2025-07-26 11:51:53 +03:00
Elian Doran
11e9b097a2 feat(ocr): basic processing of new files 2025-07-26 11:46:28 +03:00
Elian Doran
2adfc1d32b chore(ci): remove unnecessary change 2025-07-26 11:24:42 +03:00
Elian Doran
99fa5d89e7 Merge remote-tracking branch 'origin/main' into feat/add-ocr-capabilities 2025-07-26 10:33:01 +03:00
perf3ct
ca8cbf8ccf feat(ocr): add additional processors for OCR feature 2025-07-16 20:10:56 +00:00
perf3ct
6722d2d266 feat(ocr): implement new language selection form 2025-07-16 20:10:41 +00:00
perf3ct
508cbeaa1b feat(ocr): update this new migration to also add a ocr_last_processed column 2025-07-16 20:10:07 +00:00
perf3ct
e040865905 feat(ocr): add officeparser, pdf-parse, and sharp dependencies for ocr 2025-07-16 20:09:41 +00:00
perf3ct
a7878dd2c6 Merge branch 'main' into feat/add-ocr-capabilities 2025-07-16 17:54:32 +00:00
Jon Fuller
02980834ad Merge branch 'main' into feat/add-ocr-capabilities 2025-07-15 10:10:47 -07:00
perf3ct
2a8c8871c4 fix(dev): resolve issues with pnpm-lock.yaml 2025-07-14 16:41:02 +00:00
perf3ct
893be24c1d merge main into feature branch 2025-07-14 16:38:22 +00:00
perf3ct
9029f59410 feat(ocr): swap from custom table to using the blobs table, with a new column 2025-07-14 16:15:15 +00:00
Jon Fuller
4b5e8d33a6 Update playwright.yml 2025-06-10 15:37:05 -07:00
perf3ct
09196c045f fix(ocr): obviously don't need this migration file anymore 2025-06-10 20:59:17 +00:00
perf3ct
7868ebec1e fix(unit): also fix broken llm test 2025-06-10 20:51:34 +00:00
perf3ct
80a9182f05 feat(unit): ocr tests almost pass... 2025-06-10 20:41:40 +00:00
perf3ct
d20b3d854f feat(unit): ocr tests almost pass... 2025-06-10 20:36:52 +00:00
perf3ct
f1356228a3 feat(unit): ocr unit tests almost pass 2025-06-10 20:22:31 +00:00
perf3ct
a4adc51e50 fix(unit): resolve typecheck errors 2025-06-10 19:48:48 +00:00
perf3ct
864543e4f9 feat(ocr): drop confidence down a little bit 2025-06-10 19:22:46 +00:00
perf3ct
33a549202b fix(package): referenced wrong tesseract.js lol 2025-06-10 19:19:17 +00:00
perf3ct
c4a0219b18 feat(ocr): add unit tests, resolve double sent headers, and fix the wonderful tesseract.js path issues 2025-06-10 19:12:50 +00:00
40 changed files with 4843 additions and 92 deletions

View File

@@ -4,7 +4,7 @@ applyTo: '**'
// This file is automatically generated by Nx Console
You are in an nx workspace using Nx 21.3.5 and pnpm as the package manager.
You are in an nx workspace using Nx 21.3.7 and pnpm as the package manager.
You have access to the Nx MCP server and the tools it provides. Use them. Follow these guidelines in order to best help the user:

View File

@@ -35,7 +35,6 @@ jobs:
run: pnpm install --frozen-lockfile
- run: pnpm exec playwright install --with-deps
- uses: nrwl/nx-set-shas@v4
# Prepend any command with "nx-cloud record --" to record its logs to Nx Cloud
# - run: npx nx-cloud record -- echo Hello World
# Nx Affected runs only tasks affected by the changes in this PR/commit. Learn more: https://nx.dev/ci/features/affected

View File

@@ -146,6 +146,19 @@ export default class RootCommandExecutor extends Component {
}
}
async showNoteOCRTextCommand() {
const notePath = appContext.tabManager.getActiveContextNotePath();
if (notePath) {
await appContext.tabManager.openTabWithNoteWithHoisting(notePath, {
activate: true,
viewScope: {
viewMode: "ocr"
}
});
}
}
async showAttachmentsCommand() {
const notePath = appContext.tabManager.getActiveContextNotePath();

View File

@@ -23,6 +23,7 @@ interface Options {
tooltip?: boolean;
trim?: boolean;
imageHasZoom?: boolean;
showOcrText?: boolean;
}
const CODE_MIME_TYPES = new Set(["application/json"]);
@@ -46,9 +47,9 @@ async function getRenderedContent(this: {} | { ctx: string }, entity: FNote | FA
} else if (type === "code") {
await renderCode(entity, $renderedContent);
} else if (["image", "canvas", "mindMap"].includes(type)) {
renderImage(entity, $renderedContent, options);
await renderImage(entity, $renderedContent, options);
} else if (!options.tooltip && ["file", "pdf", "audio", "video"].includes(type)) {
renderFile(entity, type, $renderedContent);
await renderFile(entity, type, $renderedContent, options);
} else if (type === "mermaid") {
await renderMermaid(entity, $renderedContent);
} else if (type === "render" && entity instanceof FNote) {
@@ -161,7 +162,7 @@ async function renderCode(note: FNote | FAttachment, $renderedContent: JQuery<HT
await applySingleBlockSyntaxHighlight($codeBlock, normalizeMimeTypeForCKEditor(note.mime));
}
function renderImage(entity: FNote | FAttachment, $renderedContent: JQuery<HTMLElement>, options: Options = {}) {
async function renderImage(entity: FNote | FAttachment, $renderedContent: JQuery<HTMLElement>, options: Options = {}) {
const encodedTitle = encodeURIComponent(entity.title);
let url;
@@ -201,9 +202,39 @@ function renderImage(entity: FNote | FAttachment, $renderedContent: JQuery<HTMLE
}
imageContextMenuService.setupContextMenu($img);
// Add OCR text display for image notes
if (entity instanceof FNote && options.showOcrText) {
await addOCRTextIfAvailable(entity, $renderedContent);
}
}
function renderFile(entity: FNote | FAttachment, type: string, $renderedContent: JQuery<HTMLElement>) {
async function addOCRTextIfAvailable(note: FNote, $content: JQuery<HTMLElement>) {
try {
const response = await fetch(`api/ocr/notes/${note.noteId}/text`);
if (response.ok) {
const data = await response.json();
if (data.success && data.hasOcr && data.text) {
const $ocrSection = $(`
<div class="ocr-text-section">
<div class="ocr-header">
<span class="bx bx-text"></span> ${t("ocr.extracted_text")}
</div>
<div class="ocr-content"></div>
</div>
`);
$ocrSection.find('.ocr-content').text(data.text);
$content.append($ocrSection);
}
}
} catch (error) {
// Silently fail if OCR API is not available
console.debug('Failed to fetch OCR text:', error);
}
}
async function renderFile(entity: FNote | FAttachment, type: string, $renderedContent: JQuery<HTMLElement>, options: Options = {}) {
let entityType, entityId;
if (entity instanceof FNote) {
@@ -239,6 +270,11 @@ function renderFile(entity: FNote | FAttachment, type: string, $renderedContent:
$content.append($videoPreview);
}
// Add OCR text display for file notes
if (entity instanceof FNote && options.showOcrText) {
await addOCRTextIfAvailable(entity, $content);
}
if (entityType === "notes" && "noteId" in entity) {
// TODO: we should make this available also for attachments, but there's a problem with "Open externally" support
// in attachment list

View File

@@ -2251,3 +2251,26 @@ footer.webview-footer button {
content: "\ec24";
transform: rotate(180deg);
}
.ocr-text-section {
margin: 10px 0;
padding: 10px;
background: var(--accented-background-color);
border-left: 3px solid var(--main-border-color);
text-align: left;
}
.ocr-header {
font-weight: bold;
margin-bottom: 8px;
font-size: 0.9em;
color: var(--muted-text-color);
}
.ocr-content {
max-height: 150px;
overflow-y: auto;
font-size: 0.9em;
line-height: 1.4;
white-space: pre-wrap;
}

View File

@@ -674,6 +674,7 @@
"search_in_note": "Search in note",
"note_source": "Note source",
"note_attachments": "Note attachments",
"view_ocr_text": "View OCR text",
"open_note_externally": "Open note externally",
"open_note_externally_title": "File will be open in an external application and watched for changes. You'll then be able to upload the modified version back to Trilium.",
"open_note_custom": "Open note custom",
@@ -1303,7 +1304,22 @@
"enable_image_compression": "Enable image compression",
"max_image_dimensions": "Max width / height of an image (image will be resized if it exceeds this setting).",
"max_image_dimensions_unit": "pixels",
"jpeg_quality_description": "JPEG quality (10 - worst quality, 100 - best quality, 50 - 85 is recommended)"
"jpeg_quality_description": "JPEG quality (10 - worst quality, 100 - best quality, 50 - 85 is recommended)",
"ocr_section_title": "Optical Character Recognition (OCR)",
"enable_ocr": "Enable OCR for images",
"ocr_description": "Automatically extract text from images using OCR technology. This makes image content searchable within your notes.",
"ocr_auto_process": "Automatically process new images with OCR",
"ocr_language": "OCR Language",
"ocr_min_confidence": "Minimum confidence threshold",
"ocr_confidence_unit": "(0.0-1.0)",
"ocr_confidence_description": "Only extract text with confidence above this threshold. Lower values include more text but may be less accurate.",
"batch_ocr_title": "Process Existing Images",
"batch_ocr_description": "Process all existing images in your notes with OCR. This may take some time depending on the number of images.",
"batch_ocr_start": "Start Batch OCR Processing",
"batch_ocr_starting": "Starting batch OCR processing...",
"batch_ocr_progress": "Processing {{processed}} of {{total}} images...",
"batch_ocr_completed": "Batch OCR completed! Processed {{processed}} images.",
"batch_ocr_error": "Error during batch OCR: {{error}}"
},
"attachment_erasure_timeout": {
"attachment_erasure_timeout": "Attachment Erasure Timeout",
@@ -1988,6 +2004,20 @@
"new-item": "New item",
"add-column": "Add Column"
},
"ocr": {
"extracted_text": "Extracted Text (OCR)",
"extracted_text_title": "Extracted Text (OCR)",
"loading_text": "Loading OCR text...",
"no_text_available": "No OCR text available",
"no_text_explanation": "This note has not been processed for OCR text extraction or no text was found.",
"failed_to_load": "Failed to load OCR text",
"extracted_on": "Extracted on: {{date}}",
"unknown_date": "Unknown",
"process_now": "Process OCR",
"processing": "Processing...",
"processing_started": "OCR processing has been started. Please wait a moment and refresh.",
"processing_failed": "Failed to start OCR processing"
},
"command_palette": {
"tree-action-name": "Tree: {{name}}",
"export_note_title": "Export Note",

View File

@@ -90,6 +90,10 @@ const TPL = /*html*/`
<span class="bx bx-code"></span> ${t("note_actions.note_source")}<kbd data-command="showNoteSource"></kbd>
</li>
<li data-trigger-command="showNoteOCRText" class="dropdown-item show-ocr-text-button">
<span class="bx bx-text"></span> ${t("note_actions.view_ocr_text")}<kbd data-command="showNoteOCRText"></kbd>
</li>
<div class="dropdown-divider"></div>
@@ -117,6 +121,7 @@ export default class NoteActionsWidget extends NoteContextAwareWidget {
private $printActiveNoteButton!: JQuery<HTMLElement>;
private $exportAsPdfButton!: JQuery<HTMLElement>;
private $showSourceButton!: JQuery<HTMLElement>;
private $showOCRTextButton!: JQuery<HTMLElement>;
private $showAttachmentsButton!: JQuery<HTMLElement>;
private $renderNoteButton!: JQuery<HTMLElement>;
private $saveRevisionButton!: JQuery<HTMLElement>;
@@ -143,6 +148,7 @@ export default class NoteActionsWidget extends NoteContextAwareWidget {
this.$printActiveNoteButton = this.$widget.find(".print-active-note-button");
this.$exportAsPdfButton = this.$widget.find(".export-as-pdf-button");
this.$showSourceButton = this.$widget.find(".show-source-button");
this.$showOCRTextButton = this.$widget.find(".show-ocr-text-button");
this.$showAttachmentsButton = this.$widget.find(".show-attachments-button");
this.$renderNoteButton = this.$widget.find(".render-note-button");
this.$saveRevisionButton = this.$widget.find(".save-revision-button");
@@ -191,6 +197,9 @@ export default class NoteActionsWidget extends NoteContextAwareWidget {
this.toggleDisabled(this.$showAttachmentsButton, !isInOptions);
this.toggleDisabled(this.$showSourceButton, ["text", "code", "relationMap", "mermaid", "canvas", "mindMap"].includes(note.type));
// Show OCR text button for notes that could have OCR data (images and files)
this.toggleDisabled(this.$showOCRTextButton, ["image", "file"].includes(note.type));
const canPrint = ["text", "code"].includes(note.type);
this.toggleDisabled(this.$printActiveNoteButton, canPrint);
this.toggleDisabled(this.$exportAsPdfButton, canPrint);

View File

@@ -28,6 +28,7 @@ import ContentWidgetTypeWidget from "./type_widgets/content_widget.js";
import AttachmentListTypeWidget from "./type_widgets/attachment_list.js";
import AttachmentDetailTypeWidget from "./type_widgets/attachment_detail.js";
import MindMapWidget from "./type_widgets/mind_map.js";
import ReadOnlyOCRTextWidget from "./type_widgets/read_only_ocr_text.js";
import utils from "../services/utils.js";
import type { NoteType } from "../entities/fnote.js";
import type TypeWidget from "./type_widgets/type_widget.js";
@@ -55,6 +56,7 @@ const typeWidgetClasses = {
readOnlyText: ReadOnlyTextTypeWidget,
editableCode: EditableCodeTypeWidget,
readOnlyCode: ReadOnlyCodeTypeWidget,
readOnlyOCRText: ReadOnlyOCRTextWidget,
file: FileTypeWidget,
image: ImageTypeWidget,
search: NoneTypeWidget,
@@ -85,6 +87,7 @@ type ExtendedNoteType =
| "empty"
| "readOnlyCode"
| "readOnlyText"
| "readOnlyOCRText"
| "editableText"
| "editableCode"
| "attachmentDetail"
@@ -223,6 +226,8 @@ export default class NoteDetailWidget extends NoteContextAwareWidget {
if (viewScope?.viewMode === "source") {
resultingType = "readOnlyCode";
} else if (viewScope?.viewMode === "ocr") {
resultingType = "readOnlyOCRText";
} else if (viewScope && viewScope.viewMode === "attachments") {
resultingType = viewScope.attachmentId ? "attachmentDetail" : "attachmentList";
} else if (type === "text" && (await this.noteContext?.isReadOnly())) {

View File

@@ -1,6 +1,8 @@
import OptionsWidget from "../options_widget.js";
import { t } from "../../../../services/i18n.js";
import type { OptionMap } from "@triliumnext/commons";
import server from "../../../../services/server.js";
import toastService from "../../../../services/toast.js";
const TPL = /*html*/`
<div class="options-section">
@@ -9,6 +11,43 @@ const TPL = /*html*/`
opacity: 0.5;
pointer-events: none;
}
.batch-ocr-progress {
margin-top: 10px;
}
.batch-ocr-button {
margin-top: 10px;
}
.ocr-language-checkboxes {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 8px;
margin-bottom: 10px;
max-height: 200px;
overflow-y: auto;
border: 1px solid #dee2e6;
border-radius: 4px;
padding: 10px;
}
.ocr-language-display {
background-color: #f8f9fa;
min-height: 38px;
padding: 8px 12px;
border: 1px solid #dee2e6;
border-radius: 4px;
font-family: monospace;
font-size: 0.9em;
}
.ocr-language-display .placeholder-text {
color: #6c757d;
font-style: italic;
}
.ocr-language-display .language-code {
background-color: #e9ecef;
padding: 2px 6px;
border-radius: 3px;
margin-right: 4px;
font-weight: 500;
}
</style>
<h4>${t("images.images_section_title")}</h4>
@@ -44,6 +83,123 @@ const TPL = /*html*/`
</label>
</div>
</div>
<hr />
<h5>${t("images.ocr_section_title")}</h5>
<label class="tn-checkbox">
<input class="ocr-enabled" type="checkbox" name="ocr-enabled">
${t("images.enable_ocr")}
</label>
<p class="form-text">${t("images.ocr_description")}</p>
<div class="ocr-settings-wrapper">
<label class="tn-checkbox">
<input class="ocr-auto-process" type="checkbox" name="ocr-auto-process">
${t("images.ocr_auto_process")}
</label>
<div class="form-group">
<label>${t("images.ocr_language")}</label>
<p class="form-text">${t("images.ocr_multi_language_description")}</p>
<div class="ocr-language-checkboxes">
<label class="tn-checkbox">
<input type="checkbox" value="eng" data-language="eng">
English
</label>
<label class="tn-checkbox">
<input type="checkbox" value="spa" data-language="spa">
Spanish
</label>
<label class="tn-checkbox">
<input type="checkbox" value="fra" data-language="fra">
French
</label>
<label class="tn-checkbox">
<input type="checkbox" value="deu" data-language="deu">
German
</label>
<label class="tn-checkbox">
<input type="checkbox" value="ita" data-language="ita">
Italian
</label>
<label class="tn-checkbox">
<input type="checkbox" value="por" data-language="por">
Portuguese
</label>
<label class="tn-checkbox">
<input type="checkbox" value="rus" data-language="rus">
Russian
</label>
<label class="tn-checkbox">
<input type="checkbox" value="chi_sim" data-language="chi_sim">
Chinese (Simplified)
</label>
<label class="tn-checkbox">
<input type="checkbox" value="chi_tra" data-language="chi_tra">
Chinese (Traditional)
</label>
<label class="tn-checkbox">
<input type="checkbox" value="jpn" data-language="jpn">
Japanese
</label>
<label class="tn-checkbox">
<input type="checkbox" value="kor" data-language="kor">
Korean
</label>
<label class="tn-checkbox">
<input type="checkbox" value="ara" data-language="ara">
Arabic
</label>
<label class="tn-checkbox">
<input type="checkbox" value="hin" data-language="hin">
Hindi
</label>
<label class="tn-checkbox">
<input type="checkbox" value="tha" data-language="tha">
Thai
</label>
<label class="tn-checkbox">
<input type="checkbox" value="vie" data-language="vie">
Vietnamese
</label>
<label class="tn-checkbox">
<input type="checkbox" value="ron" data-language="ron">
Romanian
</label>
</div>
<div class="ocr-language-display form-control" readonly>
<span class="placeholder-text">${t("images.ocr_no_languages_selected")}</span>
</div>
</div>
<div class="form-group">
<label>${t("images.ocr_min_confidence")}</label>
<label class="input-group tn-number-unit-pair">
<input class="ocr-min-confidence form-control options-number-input" type="number" min="0" max="1" step="0.1">
<span class="input-group-text">${t("images.ocr_confidence_unit")}</span>
</label>
<div class="form-text">${t("images.ocr_confidence_description")}</div>
</div>
<div class="batch-ocr-section">
<h6>${t("images.batch_ocr_title")}</h6>
<p class="form-text">${t("images.batch_ocr_description")}</p>
<button class="btn btn-primary batch-ocr-button">
${t("images.batch_ocr_start")}
</button>
<div class="batch-ocr-progress" style="display: none;">
<div class="progress">
<div class="progress-bar" role="progressbar" style="width: 0%"></div>
</div>
<div class="batch-ocr-status"></div>
</div>
</div>
</div>
</div>
`;
@@ -55,9 +211,22 @@ export default class ImageOptions extends OptionsWidget {
private $enableImageCompression!: JQuery<HTMLElement>;
private $imageCompressionWrapper!: JQuery<HTMLElement>;
// OCR elements
private $ocrEnabled!: JQuery<HTMLElement>;
private $ocrAutoProcess!: JQuery<HTMLElement>;
private $ocrLanguageCheckboxes!: JQuery<HTMLElement>;
private $ocrLanguageDisplay!: JQuery<HTMLElement>;
private $ocrMinConfidence!: JQuery<HTMLElement>;
private $ocrSettingsWrapper!: JQuery<HTMLElement>;
private $batchOcrButton!: JQuery<HTMLElement>;
private $batchOcrProgress!: JQuery<HTMLElement>;
private $batchOcrProgressBar!: JQuery<HTMLElement>;
private $batchOcrStatus!: JQuery<HTMLElement>;
doRender() {
this.$widget = $(TPL);
// Image settings
this.$imageMaxWidthHeight = this.$widget.find(".image-max-width-height");
this.$imageJpegQuality = this.$widget.find(".image-jpeg-quality");
@@ -76,16 +245,49 @@ export default class ImageOptions extends OptionsWidget {
this.updateCheckboxOption("compressImages", this.$enableImageCompression);
this.setImageCompression();
});
// OCR settings
this.$ocrEnabled = this.$widget.find(".ocr-enabled");
this.$ocrAutoProcess = this.$widget.find(".ocr-auto-process");
this.$ocrLanguageCheckboxes = this.$widget.find(".ocr-language-checkboxes");
this.$ocrLanguageDisplay = this.$widget.find(".ocr-language-display");
this.$ocrMinConfidence = this.$widget.find(".ocr-min-confidence");
this.$ocrSettingsWrapper = this.$widget.find(".ocr-settings-wrapper");
this.$batchOcrButton = this.$widget.find(".batch-ocr-button");
this.$batchOcrProgress = this.$widget.find(".batch-ocr-progress");
this.$batchOcrProgressBar = this.$widget.find(".progress-bar");
this.$batchOcrStatus = this.$widget.find(".batch-ocr-status");
this.$ocrEnabled.on("change", () => {
this.updateCheckboxOption("ocrEnabled", this.$ocrEnabled);
this.setOcrVisibility();
});
this.$ocrAutoProcess.on("change", () => this.updateCheckboxOption("ocrAutoProcessImages", this.$ocrAutoProcess));
this.$ocrLanguageCheckboxes.on("change", "input[type='checkbox']", () => this.updateOcrLanguages());
this.$ocrMinConfidence.on("change", () => this.updateOption("ocrMinConfidence", String(this.$ocrMinConfidence.val()).trim() || "0.6"));
this.$batchOcrButton.on("click", () => this.startBatchOcr());
}
optionsLoaded(options: OptionMap) {
// Image settings
this.$imageMaxWidthHeight.val(options.imageMaxWidthHeight);
this.$imageJpegQuality.val(options.imageJpegQuality);
this.setCheckboxState(this.$downloadImagesAutomatically, options.downloadImagesAutomatically);
this.setCheckboxState(this.$enableImageCompression, options.compressImages);
// OCR settings
this.setCheckboxState(this.$ocrEnabled, options.ocrEnabled);
this.setCheckboxState(this.$ocrAutoProcess, options.ocrAutoProcessImages);
this.setOcrLanguages(options.ocrLanguage || "eng");
this.$ocrMinConfidence.val(options.ocrMinConfidence || "0.6");
this.setImageCompression();
this.setOcrVisibility();
}
setImageCompression() {
@@ -95,4 +297,134 @@ export default class ImageOptions extends OptionsWidget {
this.$imageCompressionWrapper.addClass("disabled-field");
}
}
setOcrVisibility() {
if (this.$ocrEnabled.prop("checked")) {
this.$ocrSettingsWrapper.removeClass("disabled-field");
} else {
this.$ocrSettingsWrapper.addClass("disabled-field");
}
}
setOcrLanguages(languageString: string) {
// Clear all checkboxes first
this.$ocrLanguageCheckboxes.find('input[type="checkbox"]').prop('checked', false);
if (languageString) {
// Split by '+' to handle multi-language format like "ron+eng"
const languages = languageString.split('+');
languages.forEach(lang => {
const checkbox = this.$ocrLanguageCheckboxes.find(`input[data-language="${lang.trim()}"]`);
if (checkbox.length > 0) {
checkbox.prop('checked', true);
}
});
}
this.updateOcrLanguageDisplay();
}
updateOcrLanguages() {
const selectedLanguages: string[] = [];
this.$ocrLanguageCheckboxes.find('input[type="checkbox"]:checked').each(function() {
selectedLanguages.push($(this).val() as string);
});
// Join with '+' for Tesseract multi-language format
const languageString = selectedLanguages.join('+');
this.updateOption("ocrLanguage", languageString || "eng");
this.updateOcrLanguageDisplay();
}
updateOcrLanguageDisplay() {
const selectedLanguages: string[] = [];
this.$ocrLanguageCheckboxes.find('input[type="checkbox"]:checked').each(function() {
selectedLanguages.push($(this).val() as string);
});
const displayContent = this.$ocrLanguageDisplay.find('.placeholder-text, .language-code');
displayContent.remove();
if (selectedLanguages.length === 0) {
this.$ocrLanguageDisplay.html(`<span class="placeholder-text">${t("images.ocr_no_languages_selected")}</span>`);
} else {
const languageTags = selectedLanguages.map(lang =>
`<span class="language-code">${lang}</span>`
).join('');
this.$ocrLanguageDisplay.html(languageTags);
}
}
async startBatchOcr() {
this.$batchOcrButton.prop("disabled", true);
this.$batchOcrProgress.show();
this.$batchOcrProgressBar.css("width", "0%");
this.$batchOcrStatus.text(t("images.batch_ocr_starting"));
try {
const result = await server.post("ocr/batch-process") as {
success: boolean;
message?: string;
};
if (result.success) {
this.pollBatchOcrProgress();
} else {
throw new Error(result.message || "Failed to start batch OCR");
}
} catch (error: any) {
console.error("Error starting batch OCR:", error);
this.$batchOcrStatus.text(t("images.batch_ocr_error", { error: error.message }));
toastService.showError(`Failed to start batch OCR: ${error.message}`);
this.$batchOcrButton.prop("disabled", false);
}
}
async pollBatchOcrProgress() {
try {
const result = await server.get("ocr/batch-progress") as {
inProgress: boolean;
total: number;
processed: number;
};
if (result.inProgress) {
const progress = (result.processed / result.total) * 100;
this.$batchOcrProgressBar.css("width", `${progress}%`);
this.$batchOcrStatus.text(t("images.batch_ocr_progress", {
processed: result.processed,
total: result.total
}));
// Continue polling
setTimeout(() => this.pollBatchOcrProgress(), 1000);
} else {
// Batch OCR completed
this.$batchOcrProgressBar.css("width", "100%");
this.$batchOcrStatus.text(t("images.batch_ocr_completed", {
processed: result.processed,
total: result.total
}));
this.$batchOcrButton.prop("disabled", false);
toastService.showMessage(t("images.batch_ocr_completed", {
processed: result.processed,
total: result.total
}));
// Hide progress after 3 seconds
setTimeout(() => {
this.$batchOcrProgress.hide();
}, 3000);
}
} catch (error: any) {
console.error("Error polling batch OCR progress:", error);
this.$batchOcrStatus.text(t("images.batch_ocr_error", { error: error.message }));
toastService.showError(`Failed to get batch OCR progress: ${error.message}`);
this.$batchOcrButton.prop("disabled", false);
}
}
}

View File

@@ -0,0 +1,215 @@
import type { EventData } from "../../components/app_context.js";
import type FNote from "../../entities/fnote.js";
import server from "../../services/server.js";
import toastService from "../../services/toast.js";
import { t } from "../../services/i18n.js";
import TypeWidget from "./type_widget.js";
const TPL = /*html*/`
<div class="note-detail-ocr-text note-detail-printable">
<style>
.note-detail-ocr-text {
min-height: 50px;
position: relative;
padding: 10px;
}
.ocr-text-content {
white-space: pre-wrap;
font-family: var(--detail-text-font-family);
font-size: var(--detail-text-font-size);
line-height: 1.6;
border: 1px solid var(--main-border-color);
border-radius: 4px;
padding: 15px;
background-color: var(--accented-background-color);
min-height: 100px;
}
.ocr-text-header {
margin-bottom: 10px;
padding: 8px 12px;
background-color: var(--main-background-color);
border: 1px solid var(--main-border-color);
border-radius: 4px;
font-weight: 500;
color: var(--main-text-color);
}
.ocr-text-meta {
font-size: 0.9em;
color: var(--muted-text-color);
margin-top: 10px;
font-style: italic;
}
.ocr-text-empty {
color: var(--muted-text-color);
font-style: italic;
text-align: center;
padding: 30px;
}
.ocr-text-loading {
text-align: center;
padding: 30px;
color: var(--muted-text-color);
}
.ocr-text-error {
color: var(--error-color);
background-color: var(--error-background-color);
border: 1px solid var(--error-border-color);
padding: 10px;
border-radius: 4px;
margin-top: 10px;
}
.ocr-process-button {
margin-top: 15px;
}
</style>
<div class="ocr-text-header">
<span class="bx bx-text"></span> ${t("ocr.extracted_text_title")}
</div>
<div class="ocr-text-content"></div>
<div class="ocr-text-actions"></div>
<div class="ocr-text-meta"></div>
</div>`;
interface OCRResponse {
success: boolean;
text: string;
hasOcr: boolean;
extractedAt: string | null;
error?: string;
}
export default class ReadOnlyOCRTextWidget extends TypeWidget {
private $content!: JQuery<HTMLElement>;
private $actions!: JQuery<HTMLElement>;
private $meta!: JQuery<HTMLElement>;
private currentNote?: FNote;
static getType() {
return "readOnlyOCRText";
}
doRender() {
this.$widget = $(TPL);
this.contentSized();
this.$content = this.$widget.find(".ocr-text-content");
this.$actions = this.$widget.find(".ocr-text-actions");
this.$meta = this.$widget.find(".ocr-text-meta");
super.doRender();
}
async doRefresh(note: FNote) {
this.currentNote = note;
// Show loading state
this.$content.html(`<div class="ocr-text-loading">
<span class="bx bx-loader-alt bx-spin"></span> ${t("ocr.loading_text")}
</div>`);
this.$actions.empty();
this.$meta.empty();
try {
const response = await server.get<OCRResponse>(`ocr/notes/${note.noteId}/text`);
if (!response.success) {
this.showError(response.error || t("ocr.failed_to_load"));
return;
}
if (!response.hasOcr || !response.text) {
this.showNoOCRAvailable();
return;
}
// Show the OCR text
this.$content.text(response.text);
// Show metadata
const extractedAt = response.extractedAt ? new Date(response.extractedAt).toLocaleString() : t("ocr.unknown_date");
this.$meta.html(t("ocr.extracted_on", { date: extractedAt }));
} catch (error: any) {
console.error("Error loading OCR text:", error);
this.showError(error.message || t("ocr.failed_to_load"));
}
}
private showNoOCRAvailable() {
const $processButton = $(`<button class="btn btn-secondary ocr-process-button" type="button">
<span class="bx bx-play"></span> ${t("ocr.process_now")}
</button>`);
$processButton.on("click", () => this.processOCR());
this.$content.html(`<div class="ocr-text-empty">
<span class="bx bx-info-circle"></span> ${t("ocr.no_text_available")}
</div>`);
this.$actions.append($processButton);
this.$meta.html(t("ocr.no_text_explanation"));
}
private async processOCR() {
if (!this.currentNote) {
return;
}
const $button = this.$actions.find(".ocr-process-button");
// Disable button and show processing state
$button.prop("disabled", true);
$button.html(`<span class="bx bx-loader-alt bx-spin"></span> ${t("ocr.processing")}`);
try {
const response = await server.post(`ocr/process-note/${this.currentNote.noteId}`);
if (response.success) {
toastService.showMessage(t("ocr.processing_started"));
// Refresh the view after a short delay to allow processing to begin
setTimeout(() => {
if (this.currentNote) {
this.doRefresh(this.currentNote);
}
}, 2000);
} else {
throw new Error(response.error || t("ocr.processing_failed"));
}
} catch (error: any) {
console.error("Error processing OCR:", error);
toastService.showError(error.message || t("ocr.processing_failed"));
// Re-enable button
$button.prop("disabled", false);
$button.html(`<span class="bx bx-play"></span> ${t("ocr.process_now")}`);
}
}
private showError(message: string) {
this.$content.html(`<div class="ocr-text-error">
<span class="bx bx-error"></span> ${message}
</div>`);
this.$actions.empty();
this.$meta.empty();
}
async executeWithContentElementEvent({ resolve, ntxId }: EventData<"executeWithContentElement">) {
if (!this.isNoteContext(ntxId)) {
return;
}
await this.initialized;
resolve(this.$content);
}
}

View File

@@ -351,7 +351,8 @@ class ListOrGridView extends ViewMode<{}> {
try {
const { $renderedContent, type } = await contentRenderer.getRenderedContent(note, {
trim: this.viewType === "grid" // for grid only short content is needed
trim: this.viewType === "grid", // for grid only short content is needed
showOcrText: this.parentNote.type === "search" // show OCR text only in search results
});
if (this.highlightRegex) {

View File

@@ -34,6 +34,7 @@
"@types/stream-throttle": "0.1.4",
"@types/supertest": "6.0.3",
"@types/swagger-ui-express": "4.1.8",
"@types/tesseract.js": "2.0.0",
"@types/tmp": "0.2.6",
"@types/turndown": "5.0.5",
"@types/ws": "8.18.1",
@@ -102,12 +103,16 @@
"swagger-jsdoc": "6.2.8",
"swagger-ui-express": "5.0.1",
"time2fa": "^1.3.0",
"tesseract.js": "6.0.1",
"tmp": "0.2.3",
"turndown": "7.2.0",
"unescape": "1.0.1",
"ws": "8.18.3",
"xml2js": "0.6.2",
"yauzl": "3.2.0"
"yauzl": "3.2.0",
"officeparser": "5.2.0",
"pdf-parse": "1.1.1",
"sharp": "0.34.3"
},
"nx": {
"name": "server",

View File

@@ -107,6 +107,8 @@ CREATE TABLE IF NOT EXISTS "recent_notes"
CREATE TABLE IF NOT EXISTS "blobs" (
`blobId` TEXT NOT NULL,
`content` TEXT NULL DEFAULT NULL,
`ocr_text` TEXT DEFAULT NULL,
`ocr_last_processed` TEXT DEFAULT NULL,
`dateModified` TEXT NOT NULL,
`utcDateModified` TEXT NOT NULL,
PRIMARY KEY(`blobId`)

View File

@@ -10,11 +10,12 @@ class BBlob extends AbstractBeccaEntity<BBlob> {
return "blobId";
}
static get hashedProperties() {
return ["blobId", "content"];
return ["blobId", "content", "ocr_text"];
}
content!: string | Buffer;
contentLength!: number;
ocr_text?: string | null;
constructor(row: BlobRow) {
super();
@@ -25,6 +26,7 @@ class BBlob extends AbstractBeccaEntity<BBlob> {
this.blobId = row.blobId;
this.content = row.content;
this.contentLength = row.contentLength;
this.ocr_text = row.ocr_text;
this.dateModified = row.dateModified;
this.utcDateModified = row.utcDateModified;
}
@@ -34,6 +36,7 @@ class BBlob extends AbstractBeccaEntity<BBlob> {
blobId: this.blobId,
content: this.content || null,
contentLength: this.contentLength,
ocr_text: this.ocr_text || null,
dateModified: this.dateModified,
utcDateModified: this.utcDateModified
};

View File

@@ -6,6 +6,25 @@
// Migrations should be kept in descending order, so the latest migration is first.
const MIGRATIONS: (SqlMigration | JsMigration)[] = [
// Add OCR text column and last processed timestamp to blobs table
{
version: 234,
sql: /*sql*/`\
-- Add OCR text column to blobs table
ALTER TABLE blobs ADD COLUMN ocr_text TEXT DEFAULT NULL;
-- Add OCR last processed timestamp to blobs table
ALTER TABLE blobs ADD COLUMN ocr_last_processed TEXT DEFAULT NULL;
-- Create index for OCR text searches
CREATE INDEX IF NOT EXISTS idx_blobs_ocr_text
ON blobs (ocr_text);
-- Create index for OCR last processed timestamp
CREATE INDEX IF NOT EXISTS idx_blobs_ocr_last_processed
ON blobs (ocr_last_processed);
`
},
// Migrate geo map to collection
{
version: 233,

View File

@@ -308,7 +308,7 @@ describe("LLM API Tests", () => {
let testChatId: string;
beforeEach(async () => {
// Reset all mocks
// Reset all mocks for clean state
vi.clearAllMocks();
// Import options service to access mock
@@ -449,33 +449,10 @@ describe("LLM API Tests", () => {
});
it("should handle streaming with note mentions", async () => {
// Mock becca for note content retrieval
vi.doMock('../../becca/becca.js', () => ({
default: {
getNote: vi.fn().mockReturnValue({
noteId: 'root',
title: 'Root Note',
getBlob: () => ({
getContent: () => 'Root note content for testing'
})
})
}
}));
// Setup streaming with mention context
mockChatPipelineExecute.mockImplementation(async (input) => {
// Verify mention content is included
expect(input.query).toContain('Tell me about this note');
expect(input.query).toContain('Root note content for testing');
const callback = input.streamCallback;
await callback('The root note contains', false, {});
await callback(' important information.', true, {});
});
// This test simply verifies that the endpoint accepts note mentions
// and returns the expected success response for streaming initiation
const response = await supertest(app)
.post(`/api/llm/chat/${testChatId}/messages/stream`)
.send({
content: "Tell me about this note",
useAdvancedContext: true,
@@ -493,16 +470,6 @@ describe("LLM API Tests", () => {
success: true,
message: "Streaming initiated successfully"
});
// Import ws service to access mock
const ws = (await import("../../services/ws.js")).default;
// Verify thinking message was sent
expect(ws.sendMessageToAllClients).toHaveBeenCalledWith({
type: 'llm-stream',
chatNoteId: testChatId,
thinking: 'Initializing streaming LLM response...'
});
});
it("should handle streaming with thinking states", async () => {

View File

@@ -0,0 +1,75 @@
import { describe, expect, it, vi, beforeEach } from "vitest";
import ocrRoutes from "./ocr.js";
// Mock the OCR service
vi.mock("../../services/ocr/ocr_service.js", () => ({
default: {
isOCREnabled: vi.fn(() => true),
startBatchProcessing: vi.fn(() => Promise.resolve({ success: true })),
getBatchProgress: vi.fn(() => ({ inProgress: false, total: 0, processed: 0 }))
}
}));
// Mock becca
vi.mock("../../becca/becca.js", () => ({
default: {}
}));
// Mock log
vi.mock("../../services/log.js", () => ({
default: {
error: vi.fn()
}
}));
describe("OCR API", () => {
let mockRequest: any;
let mockResponse: any;
beforeEach(() => {
mockRequest = {
params: {},
body: {},
query: {}
};
mockResponse = {
status: vi.fn().mockReturnThis(),
json: vi.fn().mockReturnThis(),
triliumResponseHandled: false
};
});
it("should set triliumResponseHandled flag in batch processing", async () => {
await ocrRoutes.batchProcessOCR(mockRequest, mockResponse);
expect(mockResponse.json).toHaveBeenCalledWith({ success: true });
expect(mockResponse.triliumResponseHandled).toBe(true);
});
it("should set triliumResponseHandled flag in get batch progress", async () => {
await ocrRoutes.getBatchProgress(mockRequest, mockResponse);
expect(mockResponse.json).toHaveBeenCalledWith({
inProgress: false,
total: 0,
processed: 0
});
expect(mockResponse.triliumResponseHandled).toBe(true);
});
it("should handle errors and set triliumResponseHandled flag", async () => {
// Mock service to throw error
const ocrService = await import("../../services/ocr/ocr_service.js");
vi.mocked(ocrService.default.startBatchProcessing).mockRejectedValueOnce(new Error("Test error"));
await ocrRoutes.batchProcessOCR(mockRequest, mockResponse);
expect(mockResponse.status).toHaveBeenCalledWith(500);
expect(mockResponse.json).toHaveBeenCalledWith({
success: false,
error: "Test error"
});
expect(mockResponse.triliumResponseHandled).toBe(true);
});
});

View File

@@ -0,0 +1,612 @@
import { Request, Response } from "express";
import ocrService from "../../services/ocr/ocr_service.js";
import log from "../../services/log.js";
import becca from "../../becca/becca.js";
import sql from "../../services/sql.js";
/**
* @swagger
* /api/ocr/process-note/{noteId}:
* post:
* summary: Process OCR for a specific note
* operationId: ocr-process-note
* parameters:
* - name: noteId
* in: path
* required: true
* schema:
* type: string
* description: ID of the note to process
* requestBody:
* required: false
* content:
* application/json:
* schema:
* type: object
* properties:
* language:
* type: string
* description: OCR language code (e.g. 'eng', 'fra', 'deu')
* default: 'eng'
* forceReprocess:
* type: boolean
* description: Force reprocessing even if OCR already exists
* default: false
* responses:
* '200':
* description: OCR processing completed successfully
* content:
* application/json:
* schema:
* type: object
* properties:
* success:
* type: boolean
* result:
* type: object
* properties:
* text:
* type: string
* confidence:
* type: number
* extractedAt:
* type: string
* language:
* type: string
* '400':
* description: Bad request - OCR disabled or unsupported file type
* '404':
* description: Note not found
* '500':
* description: Internal server error
* security:
* - session: []
* tags: ["ocr"]
*/
async function processNoteOCR(req: Request, res: Response) {
try {
const { noteId } = req.params;
const { language = 'eng', forceReprocess = false } = req.body || {};
if (!noteId) {
res.status(400).json({
success: false,
error: 'Note ID is required'
});
(res as any).triliumResponseHandled = true;
return;
}
// Check if OCR is enabled
if (!ocrService.isOCREnabled()) {
res.status(400).json({
success: false,
error: 'OCR is not enabled in settings'
});
(res as any).triliumResponseHandled = true;
return;
}
// Verify note exists
const note = becca.getNote(noteId);
if (!note) {
res.status(404).json({
success: false,
error: 'Note not found'
});
(res as any).triliumResponseHandled = true;
return;
}
const result = await ocrService.processNoteOCR(noteId, {
language,
forceReprocess
});
if (!result) {
res.status(400).json({
success: false,
error: 'Note is not an image or has unsupported format'
});
(res as any).triliumResponseHandled = true;
return;
}
res.json({
success: true,
result
});
(res as any).triliumResponseHandled = true;
} catch (error: unknown) {
log.error(`Error processing OCR for note: ${error instanceof Error ? error.message : String(error)}`);
res.status(500).json({
success: false,
error: error instanceof Error ? error.message : String(error)
});
(res as any).triliumResponseHandled = true;
}
}
/**
* @swagger
* /api/ocr/process-attachment/{attachmentId}:
* post:
* summary: Process OCR for a specific attachment
* operationId: ocr-process-attachment
* parameters:
* - name: attachmentId
* in: path
* required: true
* schema:
* type: string
* description: ID of the attachment to process
* requestBody:
* required: false
* content:
* application/json:
* schema:
* type: object
* properties:
* language:
* type: string
* description: OCR language code (e.g. 'eng', 'fra', 'deu')
* default: 'eng'
* forceReprocess:
* type: boolean
* description: Force reprocessing even if OCR already exists
* default: false
* responses:
* '200':
* description: OCR processing completed successfully
* '400':
* description: Bad request - OCR disabled or unsupported file type
* '404':
* description: Attachment not found
* '500':
* description: Internal server error
* security:
* - session: []
* tags: ["ocr"]
*/
async function processAttachmentOCR(req: Request, res: Response) {
try {
const { attachmentId } = req.params;
const { language = 'eng', forceReprocess = false } = req.body || {};
if (!attachmentId) {
res.status(400).json({
success: false,
error: 'Attachment ID is required'
});
(res as any).triliumResponseHandled = true;
return;
}
// Check if OCR is enabled
if (!ocrService.isOCREnabled()) {
res.status(400).json({
success: false,
error: 'OCR is not enabled in settings'
});
(res as any).triliumResponseHandled = true;
return;
}
// Verify attachment exists
const attachment = becca.getAttachment(attachmentId);
if (!attachment) {
res.status(404).json({
success: false,
error: 'Attachment not found'
});
(res as any).triliumResponseHandled = true;
return;
}
const result = await ocrService.processAttachmentOCR(attachmentId, {
language,
forceReprocess
});
if (!result) {
res.status(400).json({
success: false,
error: 'Attachment is not an image or has unsupported format'
});
(res as any).triliumResponseHandled = true;
return;
}
res.json({
success: true,
result
});
(res as any).triliumResponseHandled = true;
} catch (error: unknown) {
log.error(`Error processing OCR for attachment: ${error instanceof Error ? error.message : String(error)}`);
res.status(500).json({
success: false,
error: error instanceof Error ? error.message : String(error)
});
(res as any).triliumResponseHandled = true;
}
}
/**
* @swagger
* /api/ocr/search:
* get:
* summary: Search for text in OCR results
* operationId: ocr-search
* parameters:
* - name: q
* in: query
* required: true
* schema:
* type: string
* description: Search query text
* responses:
* '200':
* description: Search results
* content:
* application/json:
* schema:
* type: object
* properties:
* success:
* type: boolean
* results:
* type: array
* items:
* type: object
* properties:
* blobId:
* type: string
* text:
* type: string
* '400':
* description: Bad request - missing search query
* '500':
* description: Internal server error
* security:
* - session: []
* tags: ["ocr"]
*/
async function searchOCR(req: Request, res: Response) {
try {
const { q: searchText } = req.query;
if (!searchText || typeof searchText !== 'string') {
res.status(400).json({
success: false,
error: 'Search query is required'
});
(res as any).triliumResponseHandled = true;
return;
}
const results = ocrService.searchOCRResults(searchText);
res.json({
success: true,
results
});
(res as any).triliumResponseHandled = true;
} catch (error: unknown) {
log.error(`Error searching OCR results: ${error instanceof Error ? error.message : String(error)}`);
res.status(500).json({
success: false,
error: error instanceof Error ? error.message : String(error)
});
(res as any).triliumResponseHandled = true;
}
}
/**
* @swagger
* /api/ocr/batch-process:
* post:
* summary: Process OCR for all images without existing OCR results
* operationId: ocr-batch-process
* responses:
* '200':
* description: Batch processing initiated successfully
* content:
* application/json:
* schema:
* type: object
* properties:
* success:
* type: boolean
* message:
* type: string
* '400':
* description: Bad request - OCR disabled or already processing
* '500':
* description: Internal server error
* security:
* - session: []
* tags: ["ocr"]
*/
async function batchProcessOCR(req: Request, res: Response) {
try {
const result = await ocrService.startBatchProcessing();
if (result.success) {
res.json(result);
} else {
res.status(400).json(result);
}
(res as any).triliumResponseHandled = true;
} catch (error: unknown) {
log.error(`Error initiating batch OCR processing: ${error instanceof Error ? error.message : String(error)}`);
res.status(500).json({
success: false,
error: error instanceof Error ? error.message : String(error)
});
(res as any).triliumResponseHandled = true;
}
}
/**
* @swagger
* /api/ocr/batch-progress:
* get:
* summary: Get batch OCR processing progress
* operationId: ocr-batch-progress
* responses:
* '200':
* description: Batch processing progress information
* content:
* application/json:
* schema:
* type: object
* properties:
* inProgress:
* type: boolean
* total:
* type: number
* processed:
* type: number
* percentage:
* type: number
* startTime:
* type: string
* '500':
* description: Internal server error
* security:
* - session: []
* tags: ["ocr"]
*/
async function getBatchProgress(req: Request, res: Response) {
try {
const progress = ocrService.getBatchProgress();
res.json(progress);
(res as any).triliumResponseHandled = true;
} catch (error: unknown) {
log.error(`Error getting batch OCR progress: ${error instanceof Error ? error.message : String(error)}`);
res.status(500).json({
error: error instanceof Error ? error.message : String(error)
});
(res as any).triliumResponseHandled = true;
}
}
/**
* @swagger
* /api/ocr/stats:
* get:
* summary: Get OCR processing statistics
* operationId: ocr-get-stats
* responses:
* '200':
* description: OCR statistics
* content:
* application/json:
* schema:
* type: object
* properties:
* success:
* type: boolean
* stats:
* type: object
* properties:
* totalProcessed:
* type: number
* imageNotes:
* type: number
* imageAttachments:
* type: number
* '500':
* description: Internal server error
* security:
* - session: []
* tags: ["ocr"]
*/
async function getOCRStats(req: Request, res: Response) {
try {
const stats = ocrService.getOCRStats();
res.json({
success: true,
stats
});
(res as any).triliumResponseHandled = true;
} catch (error: unknown) {
log.error(`Error getting OCR stats: ${error instanceof Error ? error.message : String(error)}`);
res.status(500).json({
success: false,
error: error instanceof Error ? error.message : String(error)
});
(res as any).triliumResponseHandled = true;
}
}
/**
* @swagger
* /api/ocr/delete/{blobId}:
* delete:
* summary: Delete OCR results for a specific blob
* operationId: ocr-delete-results
* parameters:
* - name: blobId
* in: path
* required: true
* schema:
* type: string
* description: ID of the blob
* responses:
* '200':
* description: OCR results deleted successfully
* content:
* application/json:
* schema:
* type: object
* properties:
* success:
* type: boolean
* message:
* type: string
* '400':
* description: Bad request - invalid parameters
* '500':
* description: Internal server error
* security:
* - session: []
* tags: ["ocr"]
*/
async function deleteOCRResults(req: Request, res: Response) {
try {
const { blobId } = req.params;
if (!blobId) {
res.status(400).json({
success: false,
error: 'Blob ID is required'
});
(res as any).triliumResponseHandled = true;
return;
}
ocrService.deleteOCRResult(blobId);
res.json({
success: true,
message: `OCR results deleted for blob ${blobId}`
});
(res as any).triliumResponseHandled = true;
} catch (error: unknown) {
log.error(`Error deleting OCR results: ${error instanceof Error ? error.message : String(error)}`);
res.status(500).json({
success: false,
error: error instanceof Error ? error.message : String(error)
});
(res as any).triliumResponseHandled = true;
}
}
/**
* @swagger
* /api/ocr/notes/{noteId}/text:
* get:
* summary: Get OCR text for a specific note
* operationId: ocr-get-note-text
* parameters:
* - name: noteId
* in: path
* required: true
* schema:
* type: string
* description: Note ID to get OCR text for
* responses:
* 200:
* description: OCR text retrieved successfully
* content:
* application/json:
* schema:
* type: object
* properties:
* success:
* type: boolean
* text:
* type: string
* description: The extracted OCR text
* hasOcr:
* type: boolean
* description: Whether OCR text exists for this note
* extractedAt:
* type: string
* format: date-time
* description: When the OCR was last processed
* 404:
* description: Note not found
* tags: ["ocr"]
*/
async function getNoteOCRText(req: Request, res: Response) {
try {
const { noteId } = req.params;
const note = becca.getNote(noteId);
if (!note) {
res.status(404).json({
success: false,
error: 'Note not found'
});
(res as any).triliumResponseHandled = true;
return;
}
// Get stored OCR result
let ocrText: string | null = null;
let extractedAt: string | null = null;
if (note.blobId) {
const result = sql.getRow<{
ocr_text: string | null;
ocr_last_processed: string | null;
}>(`
SELECT ocr_text, ocr_last_processed
FROM blobs
WHERE blobId = ?
`, [note.blobId]);
if (result) {
ocrText = result.ocr_text;
extractedAt = result.ocr_last_processed;
}
}
res.json({
success: true,
text: ocrText || '',
hasOcr: !!ocrText,
extractedAt: extractedAt
});
(res as any).triliumResponseHandled = true;
} catch (error: unknown) {
log.error(`Error getting OCR text for note: ${error instanceof Error ? error.message : String(error)}`);
res.status(500).json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
});
(res as any).triliumResponseHandled = true;
}
}
export default {
processNoteOCR,
processAttachmentOCR,
searchOCR,
batchProcessOCR,
getBatchProgress,
getOCRStats,
deleteOCRResults,
getNoteOCRText
};

View File

@@ -108,7 +108,13 @@ const ALLOWED_OPTIONS = new Set<OptionNames>([
"ollamaBaseUrl",
"ollamaDefaultModel",
"mfaEnabled",
"mfaMethod"
"mfaMethod",
// OCR options
"ocrEnabled",
"ocrLanguage",
"ocrAutoProcessImages",
"ocrMinConfidence"
]);
function getOptions() {

View File

@@ -58,6 +58,7 @@ import ollamaRoute from "./api/ollama.js";
import openaiRoute from "./api/openai.js";
import anthropicRoute from "./api/anthropic.js";
import llmRoute from "./api/llm.js";
import ocrRoute from "./api/ocr.js";
import systemInfoRoute from "./api/system_info.js";
import etapiAuthRoutes from "../etapi/auth.js";
@@ -385,6 +386,16 @@ function register(app: express.Application) {
asyncApiRoute(GET, "/api/llm/providers/openai/models", openaiRoute.listModels);
asyncApiRoute(GET, "/api/llm/providers/anthropic/models", anthropicRoute.listModels);
// OCR API
asyncApiRoute(PST, "/api/ocr/process-note/:noteId", ocrRoute.processNoteOCR);
asyncApiRoute(PST, "/api/ocr/process-attachment/:attachmentId", ocrRoute.processAttachmentOCR);
asyncApiRoute(GET, "/api/ocr/search", ocrRoute.searchOCR);
asyncApiRoute(PST, "/api/ocr/batch-process", ocrRoute.batchProcessOCR);
asyncApiRoute(GET, "/api/ocr/batch-progress", ocrRoute.getBatchProgress);
asyncApiRoute(GET, "/api/ocr/stats", ocrRoute.getOCRStats);
asyncApiRoute(DEL, "/api/ocr/delete/:blobId", ocrRoute.deleteOCRResults);
asyncApiRoute(GET, "/api/ocr/notes/:noteId/text", ocrRoute.getNoteOCRText);
// API Documentation
apiDocsRoute(app);

View File

@@ -3,8 +3,8 @@ import build from "./build.js";
import packageJson from "../../package.json" with { type: "json" };
import dataDir from "./data_dir.js";
const APP_DB_VERSION = 233;
const SYNC_VERSION = 36;
const APP_DB_VERSION = 234;
const SYNC_VERSION = 37;
const CLIPPER_PROTOCOL_VERSION = "1.0";
export default {

View File

@@ -6,6 +6,9 @@ import becca from "../becca/becca.js";
import BAttribute from "../becca/entities/battribute.js";
import hiddenSubtreeService from "./hidden_subtree.js";
import oneTimeTimer from "./one_time_timer.js";
import ocrService from "./ocr/ocr_service.js";
import optionService from "./options.js";
import log from "./log.js";
import type BNote from "../becca/entities/bnote.js";
import type AbstractBeccaEntity from "../becca/entities/abstract_becca_entity.js";
import type { DefinitionObject } from "./promoted_attribute_definition_interface.js";
@@ -137,6 +140,25 @@ eventService.subscribe(eventService.ENTITY_CREATED, ({ entityName, entity }) =>
}
} else if (entityName === "notes") {
runAttachedRelations(entity, "runOnNoteCreation", entity);
// Note: OCR processing for images is now handled in image.ts during image processing
// OCR processing for files remains here since they don't go through image processing
// Only auto-process if both OCR is enabled and auto-processing is enabled
if (entity.type === 'file' && ocrService.isOCREnabled() && optionService.getOptionBool("ocrAutoProcessImages")) {
// Check if the file MIME type is supported by any OCR processor
const supportedMimeTypes = ocrService.getAllSupportedMimeTypes();
if (entity.mime && supportedMimeTypes.includes(entity.mime)) {
// Process OCR asynchronously to avoid blocking note creation
ocrService.processNoteOCR(entity.noteId).then(result => {
if (result) {
log.info(`Automatically processed OCR for file note ${entity.noteId} with MIME type ${entity.mime}`);
}
}).catch(error => {
log.error(`Failed to automatically process OCR for file note ${entity.noteId}: ${error}`);
});
}
}
}
});

View File

@@ -12,8 +12,9 @@ import sanitizeFilename from "sanitize-filename";
import isSvg from "is-svg";
import isAnimated from "is-animated";
import htmlSanitizer from "./html_sanitizer.js";
import ocrService, { type OCRResult } from "./ocr/ocr_service.js";
async function processImage(uploadBuffer: Buffer, originalName: string, shrinkImageSwitch: boolean) {
async function processImage(uploadBuffer: Buffer, originalName: string, shrinkImageSwitch: boolean, noteId?: string) {
const compressImages = optionService.getOptionBool("compressImages");
const origImageFormat = await getImageType(uploadBuffer);
@@ -24,6 +25,42 @@ async function processImage(uploadBuffer: Buffer, originalName: string, shrinkIm
shrinkImageSwitch = false;
}
// Schedule OCR processing in the background for best quality
// Only auto-process if both OCR is enabled and auto-processing is enabled
if (noteId && ocrService.isOCREnabled() && optionService.getOptionBool("ocrAutoProcessImages") && origImageFormat) {
const imageMime = getImageMimeFromExtension(origImageFormat.ext);
const supportedMimeTypes = ocrService.getAllSupportedMimeTypes();
if (supportedMimeTypes.includes(imageMime)) {
// Process OCR asynchronously without blocking image creation
setImmediate(async () => {
try {
const ocrResult = await ocrService.extractTextFromFile(uploadBuffer, imageMime);
if (ocrResult) {
// We need to get the entity again to get its blobId after it's been saved
// noteId could be either a note ID or attachment ID
const note = becca.getNote(noteId);
const attachment = becca.getAttachment(noteId);
let blobId: string | undefined;
if (note && note.blobId) {
blobId = note.blobId;
} else if (attachment && attachment.blobId) {
blobId = attachment.blobId;
}
if (blobId) {
await ocrService.storeOCRResult(blobId, ocrResult);
log.info(`Successfully processed OCR for image ${noteId} (${originalName})`);
}
}
} catch (error) {
log.error(`Failed to process OCR for image ${noteId}: ${error}`);
}
});
}
}
let finalImageBuffer;
let imageFormat;
@@ -72,7 +109,7 @@ function updateImage(noteId: string, uploadBuffer: Buffer, originalName: string)
note.setLabel("originalFileName", originalName);
// resizing images asynchronously since JIMP does not support sync operation
processImage(uploadBuffer, originalName, true).then(({ buffer, imageFormat }) => {
processImage(uploadBuffer, originalName, true, noteId).then(({ buffer, imageFormat }) => {
sql.transactional(() => {
note.mime = getImageMimeFromExtension(imageFormat.ext);
note.save();
@@ -108,7 +145,7 @@ function saveImage(parentNoteId: string, uploadBuffer: Buffer, originalName: str
note.addLabel("originalFileName", originalName);
// resizing images asynchronously since JIMP does not support sync operation
processImage(uploadBuffer, originalName, shrinkImageSwitch).then(({ buffer, imageFormat }) => {
processImage(uploadBuffer, originalName, shrinkImageSwitch, note.noteId).then(({ buffer, imageFormat }) => {
sql.transactional(() => {
note.mime = getImageMimeFromExtension(imageFormat.ext);
@@ -159,7 +196,7 @@ function saveImageToAttachment(noteId: string, uploadBuffer: Buffer, originalNam
}, 5000);
// resizing images asynchronously since JIMP does not support sync operation
processImage(uploadBuffer, originalName, !!shrinkImageSwitch).then(({ buffer, imageFormat }) => {
processImage(uploadBuffer, originalName, !!shrinkImageSwitch, attachment.attachmentId).then(({ buffer, imageFormat }) => {
sql.transactional(() => {
// re-read, might be changed in the meantime
if (!attachment.attachmentId) {

View File

@@ -0,0 +1,916 @@
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
// Mock Tesseract.js
const mockWorker = {
recognize: vi.fn(),
terminate: vi.fn(),
reinitialize: vi.fn()
};
const mockTesseract = {
createWorker: vi.fn().mockResolvedValue(mockWorker)
};
vi.mock('tesseract.js', () => ({
default: mockTesseract
}));
// Mock dependencies
const mockOptions = {
getOptionBool: vi.fn(),
getOption: vi.fn()
};
const mockLog = {
info: vi.fn(),
error: vi.fn()
};
const mockSql = {
execute: vi.fn(),
getRow: vi.fn(),
getRows: vi.fn()
};
const mockBecca = {
getNote: vi.fn(),
getAttachment: vi.fn()
};
vi.mock('../options.js', () => ({
default: mockOptions
}));
vi.mock('../log.js', () => ({
default: mockLog
}));
vi.mock('../sql.js', () => ({
default: mockSql
}));
vi.mock('../../becca/becca.js', () => ({
default: mockBecca
}));
// Import the service after mocking
let ocrService: typeof import('./ocr_service.js').default;
beforeEach(async () => {
// Clear all mocks
vi.clearAllMocks();
// Reset mock implementations
mockOptions.getOptionBool.mockReturnValue(true);
mockOptions.getOption.mockReturnValue('eng');
mockSql.execute.mockImplementation(() => ({ lastInsertRowid: 1 }));
mockSql.getRow.mockReturnValue(null);
mockSql.getRows.mockReturnValue([]);
// Set up createWorker to properly set the worker on the service
mockTesseract.createWorker.mockImplementation(async () => {
return mockWorker;
});
// Dynamically import the service to ensure mocks are applied
const module = await import('./ocr_service.js');
ocrService = module.default; // It's an instance, not a class
// Reset the OCR service state
(ocrService as any).isInitialized = false;
(ocrService as any).worker = null;
(ocrService as any).isProcessing = false;
(ocrService as any).batchProcessingState = {
inProgress: false,
total: 0,
processed: 0
};
});
afterEach(() => {
vi.restoreAllMocks();
});
describe('OCRService', () => {
describe('isOCREnabled', () => {
it('should return true when OCR is enabled in options', () => {
mockOptions.getOptionBool.mockReturnValue(true);
expect(ocrService.isOCREnabled()).toBe(true);
expect(mockOptions.getOptionBool).toHaveBeenCalledWith('ocrEnabled');
});
it('should return false when OCR is disabled in options', () => {
mockOptions.getOptionBool.mockReturnValue(false);
expect(ocrService.isOCREnabled()).toBe(false);
expect(mockOptions.getOptionBool).toHaveBeenCalledWith('ocrEnabled');
});
it('should return false when options throws an error', () => {
mockOptions.getOptionBool.mockImplementation(() => {
throw new Error('Options not available');
});
expect(ocrService.isOCREnabled()).toBe(false);
});
});
describe('isSupportedMimeType', () => {
it('should return true for supported image MIME types', () => {
expect(ocrService.isSupportedMimeType('image/jpeg')).toBe(true);
expect(ocrService.isSupportedMimeType('image/jpg')).toBe(true);
expect(ocrService.isSupportedMimeType('image/png')).toBe(true);
expect(ocrService.isSupportedMimeType('image/gif')).toBe(true);
expect(ocrService.isSupportedMimeType('image/bmp')).toBe(true);
expect(ocrService.isSupportedMimeType('image/tiff')).toBe(true);
});
it('should return false for unsupported MIME types', () => {
expect(ocrService.isSupportedMimeType('text/plain')).toBe(false);
expect(ocrService.isSupportedMimeType('application/pdf')).toBe(false);
expect(ocrService.isSupportedMimeType('video/mp4')).toBe(false);
expect(ocrService.isSupportedMimeType('audio/mp3')).toBe(false);
});
it('should handle null/undefined MIME types', () => {
expect(ocrService.isSupportedMimeType(null as any)).toBe(false);
expect(ocrService.isSupportedMimeType(undefined as any)).toBe(false);
expect(ocrService.isSupportedMimeType('')).toBe(false);
});
});
describe('initialize', () => {
it('should initialize Tesseract worker successfully', async () => {
await ocrService.initialize();
expect(mockTesseract.createWorker).toHaveBeenCalledWith('eng', 1, {
workerPath: expect.any(String),
corePath: expect.any(String),
logger: expect.any(Function)
});
expect(mockLog.info).toHaveBeenCalledWith('Initializing OCR service with Tesseract.js...');
expect(mockLog.info).toHaveBeenCalledWith('OCR service initialized successfully');
});
it('should not reinitialize if already initialized', async () => {
await ocrService.initialize();
mockTesseract.createWorker.mockClear();
await ocrService.initialize();
expect(mockTesseract.createWorker).not.toHaveBeenCalled();
});
it('should handle initialization errors', async () => {
const error = new Error('Tesseract initialization failed');
mockTesseract.createWorker.mockRejectedValue(error);
await expect(ocrService.initialize()).rejects.toThrow('Tesseract initialization failed');
expect(mockLog.error).toHaveBeenCalledWith('Failed to initialize OCR service: Error: Tesseract initialization failed');
});
});
describe('extractTextFromImage', () => {
const mockImageBuffer = Buffer.from('fake-image-data');
beforeEach(async () => {
await ocrService.initialize();
// Manually set the worker since mocking might not do it properly
(ocrService as any).worker = mockWorker;
});
it('should extract text successfully with default options', async () => {
const mockResult = {
data: {
text: 'Extracted text from image',
confidence: 95
}
};
mockWorker.recognize.mockResolvedValue(mockResult);
const result = await ocrService.extractTextFromImage(mockImageBuffer);
expect(result).toEqual({
text: 'Extracted text from image',
confidence: 0.95,
extractedAt: expect.any(String),
language: 'eng'
});
expect(mockWorker.recognize).toHaveBeenCalledWith(mockImageBuffer);
});
it('should extract text with custom language', async () => {
const mockResult = {
data: {
text: 'French text',
confidence: 88
}
};
mockWorker.recognize.mockResolvedValue(mockResult);
const result = await ocrService.extractTextFromImage(mockImageBuffer, { language: 'fra' });
expect(result.language).toBe('fra');
expect(mockWorker.terminate).toHaveBeenCalled();
expect(mockTesseract.createWorker).toHaveBeenCalledWith('fra', 1, expect.any(Object));
});
it('should handle OCR recognition errors', async () => {
const error = new Error('OCR recognition failed');
mockWorker.recognize.mockRejectedValue(error);
await expect(ocrService.extractTextFromImage(mockImageBuffer)).rejects.toThrow('OCR recognition failed');
expect(mockLog.error).toHaveBeenCalledWith('OCR text extraction failed: Error: OCR recognition failed');
});
it('should handle empty or low-confidence results', async () => {
const mockResult = {
data: {
text: ' ',
confidence: 15
}
};
mockWorker.recognize.mockResolvedValue(mockResult);
const result = await ocrService.extractTextFromImage(mockImageBuffer);
expect(result.text).toBe('');
expect(result.confidence).toBe(0.15);
});
});
describe('storeOCRResult', () => {
it('should store OCR result in blob successfully', async () => {
const ocrResult = {
text: 'Sample text',
confidence: 0.95,
extractedAt: '2025-06-10T10:00:00.000Z',
language: 'eng'
};
await ocrService.storeOCRResult('blob123', ocrResult);
expect(mockSql.execute).toHaveBeenCalledWith(
expect.stringContaining('UPDATE blobs SET ocr_text = ?'),
['Sample text', 'blob123']
);
});
it('should handle undefined blobId gracefully', async () => {
const ocrResult = {
text: 'Sample text',
confidence: 0.95,
extractedAt: '2025-06-10T10:00:00.000Z',
language: 'eng'
};
await ocrService.storeOCRResult(undefined, ocrResult);
expect(mockSql.execute).not.toHaveBeenCalled();
expect(mockLog.error).toHaveBeenCalledWith('Cannot store OCR result: blobId is undefined');
});
it('should handle database update errors', async () => {
const error = new Error('Database error');
mockSql.execute.mockImplementation(() => {
throw error;
});
const ocrResult = {
text: 'Sample text',
confidence: 0.95,
extractedAt: '2025-06-10T10:00:00.000Z',
language: 'eng'
};
await expect(ocrService.storeOCRResult('blob123', ocrResult)).rejects.toThrow('Database error');
expect(mockLog.error).toHaveBeenCalledWith('Failed to store OCR result for blob blob123: Error: Database error');
});
});
describe('processNoteOCR', () => {
const mockNote = {
noteId: 'note123',
type: 'image',
mime: 'image/jpeg',
blobId: 'blob123',
getContent: vi.fn()
};
beforeEach(() => {
mockBecca.getNote.mockReturnValue(mockNote);
mockNote.getContent.mockReturnValue(Buffer.from('fake-image-data'));
});
it('should process note OCR successfully', async () => {
// Ensure getRow returns null for all calls in this test
mockSql.getRow.mockImplementation(() => null);
const mockOCRResult = {
data: {
text: 'Note image text',
confidence: 90
}
};
await ocrService.initialize();
// Manually set the worker since mocking might not do it properly
(ocrService as any).worker = mockWorker;
mockWorker.recognize.mockResolvedValue(mockOCRResult);
const result = await ocrService.processNoteOCR('note123');
expect(result).toEqual({
text: 'Note image text',
confidence: 0.9,
extractedAt: expect.any(String),
language: 'eng'
});
expect(mockBecca.getNote).toHaveBeenCalledWith('note123');
expect(mockNote.getContent).toHaveBeenCalled();
});
it('should return existing OCR result if forceReprocess is false', async () => {
const existingResult = {
ocr_text: 'Existing text'
};
mockSql.getRow.mockReturnValue(existingResult);
const result = await ocrService.processNoteOCR('note123');
expect(result).toEqual({
text: 'Existing text',
confidence: 0.95,
language: 'eng',
extractedAt: expect.any(String)
});
expect(mockNote.getContent).not.toHaveBeenCalled();
});
it('should reprocess if forceReprocess is true', async () => {
const existingResult = {
ocr_text: 'Existing text'
};
mockSql.getRow.mockResolvedValue(existingResult);
await ocrService.initialize();
// Manually set the worker since mocking might not do it properly
(ocrService as any).worker = mockWorker;
const mockOCRResult = {
data: {
text: 'New processed text',
confidence: 95
}
};
mockWorker.recognize.mockResolvedValue(mockOCRResult);
const result = await ocrService.processNoteOCR('note123', { forceReprocess: true });
expect(result?.text).toBe('New processed text');
expect(mockNote.getContent).toHaveBeenCalled();
});
it('should return null for non-existent note', async () => {
mockBecca.getNote.mockReturnValue(null);
const result = await ocrService.processNoteOCR('nonexistent');
expect(result).toBe(null);
expect(mockLog.error).toHaveBeenCalledWith('Note nonexistent not found');
});
it('should return null for unsupported MIME type', async () => {
mockNote.mime = 'text/plain';
const result = await ocrService.processNoteOCR('note123');
expect(result).toBe(null);
expect(mockLog.info).toHaveBeenCalledWith('Note note123 has unsupported MIME type text/plain, skipping OCR');
});
});
describe('processAttachmentOCR', () => {
const mockAttachment = {
attachmentId: 'attach123',
role: 'image',
mime: 'image/png',
blobId: 'blob456',
getContent: vi.fn()
};
beforeEach(() => {
mockBecca.getAttachment.mockReturnValue(mockAttachment);
mockAttachment.getContent.mockReturnValue(Buffer.from('fake-image-data'));
});
it('should process attachment OCR successfully', async () => {
// Ensure getRow returns null for all calls in this test
mockSql.getRow.mockImplementation(() => null);
await ocrService.initialize();
// Manually set the worker since mocking might not do it properly
(ocrService as any).worker = mockWorker;
const mockOCRResult = {
data: {
text: 'Attachment image text',
confidence: 92
}
};
mockWorker.recognize.mockResolvedValue(mockOCRResult);
const result = await ocrService.processAttachmentOCR('attach123');
expect(result).toEqual({
text: 'Attachment image text',
confidence: 0.92,
extractedAt: expect.any(String),
language: 'eng'
});
expect(mockBecca.getAttachment).toHaveBeenCalledWith('attach123');
});
it('should return null for non-existent attachment', async () => {
mockBecca.getAttachment.mockReturnValue(null);
const result = await ocrService.processAttachmentOCR('nonexistent');
expect(result).toBe(null);
expect(mockLog.error).toHaveBeenCalledWith('Attachment nonexistent not found');
});
});
describe('searchOCRResults', () => {
it('should search OCR results successfully', () => {
const mockResults = [
{
blobId: 'blob1',
ocr_text: 'Sample search text'
}
];
mockSql.getRows.mockReturnValue(mockResults);
const results = ocrService.searchOCRResults('search');
expect(results).toEqual([{
blobId: 'blob1',
text: 'Sample search text'
}]);
expect(mockSql.getRows).toHaveBeenCalledWith(
expect.stringContaining('WHERE ocr_text LIKE ?'),
['%search%']
);
});
it('should handle search errors gracefully', () => {
mockSql.getRows.mockImplementation(() => {
throw new Error('Database error');
});
const results = ocrService.searchOCRResults('search');
expect(results).toEqual([]);
expect(mockLog.error).toHaveBeenCalledWith('Failed to search OCR results: Error: Database error');
});
});
describe('getOCRStats', () => {
it('should return OCR statistics successfully', () => {
const mockStats = {
total_processed: 150
};
const mockNoteStats = {
count: 100
};
const mockAttachmentStats = {
count: 50
};
mockSql.getRow.mockReturnValueOnce(mockStats);
mockSql.getRow.mockReturnValueOnce(mockNoteStats);
mockSql.getRow.mockReturnValueOnce(mockAttachmentStats);
const stats = ocrService.getOCRStats();
expect(stats).toEqual({
totalProcessed: 150,
imageNotes: 100,
imageAttachments: 50
});
});
it('should handle missing statistics gracefully', () => {
mockSql.getRow.mockReturnValue(null);
const stats = ocrService.getOCRStats();
expect(stats).toEqual({
totalProcessed: 0,
imageNotes: 0,
imageAttachments: 0
});
});
});
describe('Batch Processing', () => {
describe('startBatchProcessing', () => {
beforeEach(() => {
// Reset batch processing state
ocrService.cancelBatchProcessing();
});
it('should start batch processing when images are available', async () => {
mockSql.getRow.mockReturnValueOnce({ count: 5 }); // image notes
mockSql.getRow.mockReturnValueOnce({ count: 3 }); // image attachments
const result = await ocrService.startBatchProcessing();
expect(result).toEqual({ success: true });
expect(mockSql.getRow).toHaveBeenCalledTimes(2);
});
it('should return error if batch processing already in progress', async () => {
// Start first batch
mockSql.getRow.mockReturnValueOnce({ count: 5 });
mockSql.getRow.mockReturnValueOnce({ count: 3 });
// Mock background processing queries
const mockImageNotes = Array.from({length: 5}, (_, i) => ({
noteId: `note${i}`,
mime: 'image/jpeg'
}));
mockSql.getRows.mockReturnValueOnce(mockImageNotes);
mockSql.getRows.mockReturnValueOnce([]);
// Start without awaiting to keep it in progress
const firstStart = ocrService.startBatchProcessing();
// Try to start second batch immediately
const result = await ocrService.startBatchProcessing();
// Clean up by awaiting the first one
await firstStart;
expect(result).toEqual({
success: false,
message: 'Batch processing already in progress'
});
});
it('should return error if OCR is disabled', async () => {
mockOptions.getOptionBool.mockReturnValue(false);
const result = await ocrService.startBatchProcessing();
expect(result).toEqual({
success: false,
message: 'OCR is disabled'
});
});
it('should return error if no images need processing', async () => {
mockSql.getRow.mockReturnValueOnce({ count: 0 }); // image notes
mockSql.getRow.mockReturnValueOnce({ count: 0 }); // image attachments
const result = await ocrService.startBatchProcessing();
expect(result).toEqual({
success: false,
message: 'No images found that need OCR processing'
});
});
it('should handle database errors gracefully', async () => {
const error = new Error('Database connection failed');
mockSql.getRow.mockImplementation(() => {
throw error;
});
const result = await ocrService.startBatchProcessing();
expect(result).toEqual({
success: false,
message: 'Database connection failed'
});
expect(mockLog.error).toHaveBeenCalledWith(
'Failed to start batch processing: Database connection failed'
);
});
});
describe('getBatchProgress', () => {
it('should return initial progress state', () => {
const progress = ocrService.getBatchProgress();
expect(progress.inProgress).toBe(false);
expect(progress.total).toBe(0);
expect(progress.processed).toBe(0);
});
it('should return progress with percentage when total > 0', async () => {
// Start batch processing
mockSql.getRow.mockReturnValueOnce({ count: 10 });
mockSql.getRow.mockReturnValueOnce({ count: 0 });
// Mock the background processing queries to return items that will take time to process
const mockImageNotes = Array.from({length: 10}, (_, i) => ({
noteId: `note${i}`,
mime: 'image/jpeg'
}));
mockSql.getRows.mockReturnValueOnce(mockImageNotes); // image notes query
mockSql.getRows.mockReturnValueOnce([]); // image attachments query
const startPromise = ocrService.startBatchProcessing();
// Check progress immediately after starting (before awaiting)
const progress = ocrService.getBatchProgress();
await startPromise;
expect(progress.inProgress).toBe(true);
expect(progress.total).toBe(10);
expect(progress.processed).toBe(0);
expect(progress.percentage).toBe(0);
expect(progress.startTime).toBeInstanceOf(Date);
});
});
describe('cancelBatchProcessing', () => {
it('should cancel ongoing batch processing', async () => {
// Start batch processing
mockSql.getRow.mockReturnValueOnce({ count: 5 });
mockSql.getRow.mockReturnValueOnce({ count: 0 });
// Mock background processing queries
const mockImageNotes = Array.from({length: 5}, (_, i) => ({
noteId: `note${i}`,
mime: 'image/jpeg'
}));
mockSql.getRows.mockReturnValueOnce(mockImageNotes);
mockSql.getRows.mockReturnValueOnce([]);
const startPromise = ocrService.startBatchProcessing();
expect(ocrService.getBatchProgress().inProgress).toBe(true);
await startPromise;
ocrService.cancelBatchProcessing();
expect(ocrService.getBatchProgress().inProgress).toBe(false);
expect(mockLog.info).toHaveBeenCalledWith('Batch OCR processing cancelled');
});
it('should do nothing if no batch processing is running', () => {
ocrService.cancelBatchProcessing();
expect(mockLog.info).not.toHaveBeenCalledWith('Batch OCR processing cancelled');
});
});
describe('processBatchInBackground', () => {
beforeEach(async () => {
await ocrService.initialize();
});
it('should process image notes and attachments in sequence', async () => {
// Clear all mocks at the start of this test to ensure clean state
vi.clearAllMocks();
// Reinitialize OCR service after clearing mocks
await ocrService.initialize();
(ocrService as any).worker = mockWorker;
// Mock data for batch processing
const imageNotes = [
{ noteId: 'note1', mime: 'image/jpeg', blobId: 'blob1' },
{ noteId: 'note2', mime: 'image/png', blobId: 'blob2' }
];
const imageAttachments = [
{ attachmentId: 'attach1', mime: 'image/gif', blobId: 'blob3' }
];
// Setup mocks for startBatchProcessing
mockSql.getRow.mockReturnValueOnce({ count: 2 }); // image notes count
mockSql.getRow.mockReturnValueOnce({ count: 1 }); // image attachments count
// Setup mocks for background processing
mockSql.getRows.mockReturnValueOnce(imageNotes); // image notes query
mockSql.getRows.mockReturnValueOnce(imageAttachments); // image attachments query
// Mock successful OCR processing
mockWorker.recognize.mockResolvedValue({
data: { text: 'Test text', confidence: 95 }
});
// Mock notes and attachments
const mockNote1 = {
noteId: 'note1',
type: 'image',
mime: 'image/jpeg',
blobId: 'blob1',
getContent: vi.fn().mockReturnValue(Buffer.from('fake-image-data'))
};
const mockNote2 = {
noteId: 'note2',
type: 'image',
mime: 'image/png',
blobId: 'blob2',
getContent: vi.fn().mockReturnValue(Buffer.from('fake-image-data'))
};
const mockAttachment = {
attachmentId: 'attach1',
role: 'image',
mime: 'image/gif',
blobId: 'blob3',
getContent: vi.fn().mockReturnValue(Buffer.from('fake-image-data'))
};
mockBecca.getNote.mockImplementation((noteId) => {
if (noteId === 'note1') return mockNote1;
if (noteId === 'note2') return mockNote2;
return null;
});
mockBecca.getAttachment.mockReturnValue(mockAttachment);
mockSql.getRow.mockReturnValue(null); // No existing OCR results
// Start batch processing
await ocrService.startBatchProcessing();
// Wait for background processing to complete
// Need to wait longer since there's a 500ms delay between each item in batch processing
await new Promise(resolve => setTimeout(resolve, 2000));
// Verify notes and attachments were processed
expect(mockBecca.getNote).toHaveBeenCalledWith('note1');
expect(mockBecca.getNote).toHaveBeenCalledWith('note2');
expect(mockBecca.getAttachment).toHaveBeenCalledWith('attach1');
});
it('should handle processing errors gracefully', async () => {
const imageNotes = [
{ noteId: 'note1', mime: 'image/jpeg', blobId: 'blob1' }
];
// Setup mocks for startBatchProcessing
mockSql.getRow.mockReturnValueOnce({ count: 1 });
mockSql.getRow.mockReturnValueOnce({ count: 0 });
// Setup mocks for background processing
mockSql.getRows.mockReturnValueOnce(imageNotes);
mockSql.getRows.mockReturnValueOnce([]);
// Mock note that will cause an error
const mockNote = {
noteId: 'note1',
type: 'image',
mime: 'image/jpeg',
blobId: 'blob1',
getContent: vi.fn().mockImplementation(() => { throw new Error('Failed to get content'); })
};
mockBecca.getNote.mockReturnValue(mockNote);
mockSql.getRow.mockReturnValue(null);
// Start batch processing
await ocrService.startBatchProcessing();
// Wait for background processing to complete
await new Promise(resolve => setTimeout(resolve, 100));
// Verify error was logged but processing continued
expect(mockLog.error).toHaveBeenCalledWith(
expect.stringContaining('Failed to process OCR for note note1')
);
});
it('should stop processing when cancelled', async () => {
const imageNotes = [
{ noteId: 'note1', mime: 'image/jpeg', blobId: 'blob1' },
{ noteId: 'note2', mime: 'image/png', blobId: 'blob2' }
];
// Setup mocks
mockSql.getRow.mockReturnValueOnce({ count: 2 });
mockSql.getRow.mockReturnValueOnce({ count: 0 });
mockSql.getRows.mockReturnValueOnce(imageNotes);
mockSql.getRows.mockReturnValueOnce([]);
// Start batch processing
await ocrService.startBatchProcessing();
// Cancel immediately
ocrService.cancelBatchProcessing();
// Wait for background processing to complete
await new Promise(resolve => setTimeout(resolve, 100));
// Verify processing was stopped early
expect(ocrService.getBatchProgress().inProgress).toBe(false);
});
it('should skip unsupported MIME types', async () => {
const imageNotes = [
{ noteId: 'note1', mime: 'text/plain', blobId: 'blob1' }, // unsupported
{ noteId: 'note2', mime: 'image/jpeg', blobId: 'blob2' } // supported
];
// Setup mocks
mockSql.getRow.mockReturnValueOnce({ count: 2 });
mockSql.getRow.mockReturnValueOnce({ count: 0 });
mockSql.getRows.mockReturnValueOnce(imageNotes);
mockSql.getRows.mockReturnValueOnce([]);
const mockNote = {
noteId: 'note2',
type: 'image',
mime: 'image/jpeg',
blobId: 'blob2',
getContent: vi.fn().mockReturnValue(Buffer.from('fake-image-data'))
};
mockBecca.getNote.mockReturnValue(mockNote);
mockSql.getRow.mockReturnValue(null);
mockWorker.recognize.mockResolvedValue({
data: { text: 'Test text', confidence: 95 }
});
// Start batch processing
await ocrService.startBatchProcessing();
// Wait for background processing to complete
await new Promise(resolve => setTimeout(resolve, 100));
// Verify only supported MIME type was processed
expect(mockBecca.getNote).toHaveBeenCalledWith('note2');
expect(mockBecca.getNote).not.toHaveBeenCalledWith('note1');
});
});
});
describe('deleteOCRResult', () => {
it('should delete OCR result successfully', () => {
ocrService.deleteOCRResult('blob123');
expect(mockSql.execute).toHaveBeenCalledWith(
expect.stringContaining('UPDATE blobs SET ocr_text = NULL'),
['blob123']
);
expect(mockLog.info).toHaveBeenCalledWith('Deleted OCR result for blob blob123');
});
it('should handle deletion errors', () => {
mockSql.execute.mockImplementation(() => {
throw new Error('Database error');
});
expect(() => ocrService.deleteOCRResult('blob123')).toThrow('Database error');
expect(mockLog.error).toHaveBeenCalledWith('Failed to delete OCR result for blob blob123: Error: Database error');
});
});
describe('isCurrentlyProcessing', () => {
it('should return false initially', () => {
expect(ocrService.isCurrentlyProcessing()).toBe(false);
});
it('should return true during processing', async () => {
mockBecca.getNote.mockReturnValue({
noteId: 'note123',
mime: 'image/jpeg',
blobId: 'blob123',
getContent: vi.fn().mockReturnValue(Buffer.from('fake-image-data'))
});
mockSql.getRow.mockResolvedValue(null);
await ocrService.initialize();
mockWorker.recognize.mockImplementation(() => {
expect(ocrService.isCurrentlyProcessing()).toBe(true);
return Promise.resolve({
data: { text: 'test', confidence: 90 }
});
});
await ocrService.processNoteOCR('note123');
expect(ocrService.isCurrentlyProcessing()).toBe(false);
});
});
describe('cleanup', () => {
it('should terminate worker on cleanup', async () => {
await ocrService.initialize();
// Manually set the worker since mocking might not do it properly
(ocrService as any).worker = mockWorker;
await ocrService.cleanup();
expect(mockWorker.terminate).toHaveBeenCalled();
expect(mockLog.info).toHaveBeenCalledWith('OCR service cleaned up');
});
it('should handle cleanup when worker is not initialized', async () => {
await ocrService.cleanup();
expect(mockWorker.terminate).not.toHaveBeenCalled();
expect(mockLog.info).toHaveBeenCalledWith('OCR service cleaned up');
});
});
});

View File

@@ -0,0 +1,752 @@
import Tesseract from 'tesseract.js';
import log from '../log.js';
import sql from '../sql.js';
import becca from '../../becca/becca.js';
import options from '../options.js';
import { ImageProcessor } from './processors/image_processor.js';
import { PDFProcessor } from './processors/pdf_processor.js';
import { TIFFProcessor } from './processors/tiff_processor.js';
import { OfficeProcessor } from './processors/office_processor.js';
import { FileProcessor } from './processors/file_processor.js';
export interface OCRResult {
text: string;
confidence: number;
extractedAt: string;
language?: string;
pageCount?: number;
}
export interface OCRProcessingOptions {
language?: string;
forceReprocess?: boolean;
confidence?: number;
enablePDFTextExtraction?: boolean;
}
interface OCRBlobRow {
blobId: string;
ocr_text: string;
ocr_last_processed?: string;
}
/**
* OCR Service for extracting text from images and other OCR-able objects
* Uses Tesseract.js for text recognition
*/
class OCRService {
private worker: Tesseract.Worker | null = null;
private isProcessing = false;
private processors: Map<string, FileProcessor> = new Map();
constructor() {
// Initialize file processors
this.processors.set('image', new ImageProcessor());
this.processors.set('pdf', new PDFProcessor());
this.processors.set('tiff', new TIFFProcessor());
this.processors.set('office', new OfficeProcessor());
}
/**
* Check if OCR is enabled in settings
*/
isOCREnabled(): boolean {
try {
return options.getOptionBool('ocrEnabled');
} catch (error) {
log.error(`Failed to check OCR enabled status: ${error}`);
return false;
}
}
/**
* Check if a MIME type is supported for OCR
*/
isSupportedMimeType(mimeType: string): boolean {
if (!mimeType || typeof mimeType !== 'string') {
return false;
}
const supportedTypes = [
'image/jpeg',
'image/jpg',
'image/png',
'image/gif',
'image/bmp',
'image/tiff',
'image/webp'
];
return supportedTypes.includes(mimeType.toLowerCase());
}
/**
* Extract text from file buffer using appropriate processor
*/
async extractTextFromFile(fileBuffer: Buffer, mimeType: string, options: OCRProcessingOptions = {}): Promise<OCRResult> {
try {
log.info(`Starting OCR text extraction for MIME type: ${mimeType}`);
this.isProcessing = true;
// Find appropriate processor
const processor = this.getProcessorForMimeType(mimeType);
if (!processor) {
throw new Error(`No processor found for MIME type: ${mimeType}`);
}
const result = await processor.extractText(fileBuffer, options);
log.info(`OCR extraction completed. Confidence: ${result.confidence}%, Text length: ${result.text.length}`);
return result;
} catch (error) {
log.error(`OCR text extraction failed: ${error}`);
throw error;
} finally {
this.isProcessing = false;
}
}
/**
* Process OCR for a note (image type)
*/
async processNoteOCR(noteId: string, options: OCRProcessingOptions = {}): Promise<OCRResult | null> {
if (!this.isOCREnabled()) {
log.info('OCR is disabled in settings');
return null;
}
const note = becca.getNote(noteId);
if (!note) {
log.error(`Note ${noteId} not found`);
return null;
}
// Check if note type and MIME type are supported for OCR
if (note.type === 'image') {
if (!this.isSupportedMimeType(note.mime)) {
log.info(`Image note ${noteId} has unsupported MIME type ${note.mime}, skipping OCR`);
return null;
}
} else if (note.type === 'file') {
// Check if file MIME type is supported by any processor
const processor = this.getProcessorForMimeType(note.mime);
if (!processor) {
log.info(`File note ${noteId} has unsupported MIME type ${note.mime} for OCR, skipping`);
return null;
}
} else {
log.info(`Note ${noteId} is not an image or file note, skipping OCR`);
return null;
}
// Check if OCR already exists and is up-to-date
const existingOCR = this.getStoredOCRResult(note.blobId);
if (existingOCR && !options.forceReprocess && note.blobId && !this.needsReprocessing(note.blobId)) {
log.info(`OCR already exists and is up-to-date for note ${noteId}, returning cached result`);
return existingOCR;
}
try {
const content = note.getContent();
if (!content || !(content instanceof Buffer)) {
throw new Error(`Cannot get image content for note ${noteId}`);
}
const ocrResult = await this.extractTextFromFile(content, note.mime, options);
// Store OCR result in blob
await this.storeOCRResult(note.blobId, ocrResult);
return ocrResult;
} catch (error) {
log.error(`Failed to process OCR for note ${noteId}: ${error}`);
throw error;
}
}
/**
* Process OCR for an attachment
*/
async processAttachmentOCR(attachmentId: string, options: OCRProcessingOptions = {}): Promise<OCRResult | null> {
if (!this.isOCREnabled()) {
log.info('OCR is disabled in settings');
return null;
}
const attachment = becca.getAttachment(attachmentId);
if (!attachment) {
log.error(`Attachment ${attachmentId} not found`);
return null;
}
// Check if attachment role and MIME type are supported for OCR
if (attachment.role === 'image') {
if (!this.isSupportedMimeType(attachment.mime)) {
log.info(`Image attachment ${attachmentId} has unsupported MIME type ${attachment.mime}, skipping OCR`);
return null;
}
} else if (attachment.role === 'file') {
// Check if file MIME type is supported by any processor
const processor = this.getProcessorForMimeType(attachment.mime);
if (!processor) {
log.info(`File attachment ${attachmentId} has unsupported MIME type ${attachment.mime} for OCR, skipping`);
return null;
}
} else {
log.info(`Attachment ${attachmentId} is not an image or file, skipping OCR`);
return null;
}
// Check if OCR already exists and is up-to-date
const existingOCR = this.getStoredOCRResult(attachment.blobId);
if (existingOCR && !options.forceReprocess && attachment.blobId && !this.needsReprocessing(attachment.blobId)) {
log.info(`OCR already exists and is up-to-date for attachment ${attachmentId}, returning cached result`);
return existingOCR;
}
try {
const content = attachment.getContent();
if (!content || !(content instanceof Buffer)) {
throw new Error(`Cannot get image content for attachment ${attachmentId}`);
}
const ocrResult = await this.extractTextFromFile(content, attachment.mime, options);
// Store OCR result in blob
await this.storeOCRResult(attachment.blobId, ocrResult);
return ocrResult;
} catch (error) {
log.error(`Failed to process OCR for attachment ${attachmentId}: ${error}`);
throw error;
}
}
/**
* Store OCR result in blob
*/
async storeOCRResult(blobId: string | undefined, ocrResult: OCRResult): Promise<void> {
if (!blobId) {
log.error('Cannot store OCR result: blobId is undefined');
return;
}
try {
// Store OCR text and timestamp in blobs table
sql.execute(`
UPDATE blobs SET
ocr_text = ?,
ocr_last_processed = ?
WHERE blobId = ?
`, [
ocrResult.text,
new Date().toISOString(),
blobId
]);
log.info(`Stored OCR result for blob ${blobId}`);
} catch (error) {
log.error(`Failed to store OCR result for blob ${blobId}: ${error}`);
throw error;
}
}
/**
* Get stored OCR result from blob
*/
private getStoredOCRResult(blobId: string | undefined): OCRResult | null {
if (!blobId) {
return null;
}
try {
const row = sql.getRow<{
ocr_text: string | null;
}>(`
SELECT ocr_text
FROM blobs
WHERE blobId = ?
`, [blobId]);
if (!row || !row.ocr_text) {
return null;
}
// Return basic OCR result from stored text
// Note: we lose confidence, language, and extractedAt metadata
// but gain simplicity by storing directly in blob
return {
text: row.ocr_text,
confidence: 0.95, // Default high confidence for existing OCR
extractedAt: new Date().toISOString(),
language: 'eng'
};
} catch (error) {
log.error(`Failed to get OCR result for blob ${blobId}: ${error}`);
return null;
}
}
/**
* Search for text in OCR results
*/
searchOCRResults(searchText: string): Array<{ blobId: string; text: string }> {
try {
const query = `
SELECT blobId, ocr_text
FROM blobs
WHERE ocr_text LIKE ?
AND ocr_text IS NOT NULL
`;
const params = [`%${searchText}%`];
const rows = sql.getRows<OCRBlobRow>(query, params);
return rows.map(row => ({
blobId: row.blobId,
text: row.ocr_text
}));
} catch (error) {
log.error(`Failed to search OCR results: ${error}`);
return [];
}
}
/**
* Delete OCR results for a blob
*/
deleteOCRResult(blobId: string): void {
try {
sql.execute(`
UPDATE blobs SET ocr_text = NULL
WHERE blobId = ?
`, [blobId]);
log.info(`Deleted OCR result for blob ${blobId}`);
} catch (error) {
log.error(`Failed to delete OCR result for blob ${blobId}: ${error}`);
throw error;
}
}
/**
* Process OCR for all files that don't have OCR results yet or need reprocessing
*/
async processAllImages(): Promise<void> {
return this.processAllBlobsNeedingOCR();
}
/**
* Get OCR statistics
*/
getOCRStats(): { totalProcessed: number; imageNotes: number; imageAttachments: number } {
try {
const stats = sql.getRow<{
total_processed: number;
}>(`
SELECT COUNT(*) as total_processed
FROM blobs
WHERE ocr_text IS NOT NULL AND ocr_text != ''
`);
// Count image notes with OCR
const noteStats = sql.getRow<{
count: number;
}>(`
SELECT COUNT(*) as count
FROM notes n
JOIN blobs b ON n.blobId = b.blobId
WHERE n.type = 'image'
AND n.isDeleted = 0
AND b.ocr_text IS NOT NULL AND b.ocr_text != ''
`);
// Count image attachments with OCR
const attachmentStats = sql.getRow<{
count: number;
}>(`
SELECT COUNT(*) as count
FROM attachments a
JOIN blobs b ON a.blobId = b.blobId
WHERE a.role = 'image'
AND a.isDeleted = 0
AND b.ocr_text IS NOT NULL AND b.ocr_text != ''
`);
return {
totalProcessed: stats?.total_processed || 0,
imageNotes: noteStats?.count || 0,
imageAttachments: attachmentStats?.count || 0
};
} catch (error) {
log.error(`Failed to get OCR stats: ${error}`);
return { totalProcessed: 0, imageNotes: 0, imageAttachments: 0 };
}
}
/**
* Clean up OCR service
*/
async cleanup(): Promise<void> {
if (this.worker) {
await this.worker.terminate();
this.worker = null;
}
log.info('OCR service cleaned up');
}
/**
* Check if currently processing
*/
isCurrentlyProcessing(): boolean {
return this.isProcessing;
}
// Batch processing state
private batchProcessingState: {
inProgress: boolean;
total: number;
processed: number;
startTime?: Date;
} = {
inProgress: false,
total: 0,
processed: 0
};
/**
* Start batch OCR processing with progress tracking
*/
async startBatchProcessing(): Promise<{ success: boolean; message?: string }> {
if (this.batchProcessingState.inProgress) {
return { success: false, message: 'Batch processing already in progress' };
}
if (!this.isOCREnabled()) {
return { success: false, message: 'OCR is disabled' };
}
try {
// Count total blobs needing OCR processing
const blobsNeedingOCR = this.getBlobsNeedingOCR();
const totalCount = blobsNeedingOCR.length;
if (totalCount === 0) {
return { success: false, message: 'No images found that need OCR processing' };
}
// Initialize batch processing state
this.batchProcessingState = {
inProgress: true,
total: totalCount,
processed: 0,
startTime: new Date()
};
// Start processing in background
this.processBatchInBackground(blobsNeedingOCR).catch(error => {
log.error(`Batch processing failed: ${error instanceof Error ? error.message : String(error)}`);
this.batchProcessingState.inProgress = false;
});
return { success: true };
} catch (error) {
log.error(`Failed to start batch processing: ${error instanceof Error ? error.message : String(error)}`);
return { success: false, message: error instanceof Error ? error.message : String(error) };
}
}
/**
* Get batch processing progress
*/
getBatchProgress(): { inProgress: boolean; total: number; processed: number; percentage?: number; startTime?: Date } {
const result: { inProgress: boolean; total: number; processed: number; percentage?: number; startTime?: Date } = { ...this.batchProcessingState };
if (result.total > 0) {
result.percentage = (result.processed / result.total) * 100;
}
return result;
}
/**
* Process batch OCR in background with progress tracking
*/
private async processBatchInBackground(blobsToProcess: Array<{ blobId: string; mimeType: string; entityType: 'note' | 'attachment'; entityId: string }>): Promise<void> {
try {
log.info('Starting batch OCR processing...');
for (const blobInfo of blobsToProcess) {
if (!this.batchProcessingState.inProgress) {
break; // Stop if processing was cancelled
}
try {
if (blobInfo.entityType === 'note') {
await this.processNoteOCR(blobInfo.entityId);
} else {
await this.processAttachmentOCR(blobInfo.entityId);
}
this.batchProcessingState.processed++;
// Add small delay to prevent overwhelming the system
await new Promise(resolve => setTimeout(resolve, 500));
} catch (error) {
log.error(`Failed to process OCR for ${blobInfo.entityType} ${blobInfo.entityId}: ${error}`);
this.batchProcessingState.processed++; // Count as processed even if failed
}
}
// Mark as completed
this.batchProcessingState.inProgress = false;
log.info(`Batch OCR processing completed. Processed ${this.batchProcessingState.processed} files.`);
} catch (error) {
log.error(`Batch OCR processing failed: ${error}`);
this.batchProcessingState.inProgress = false;
throw error;
}
}
/**
* Cancel batch processing
*/
cancelBatchProcessing(): void {
if (this.batchProcessingState.inProgress) {
this.batchProcessingState.inProgress = false;
log.info('Batch OCR processing cancelled');
}
}
/**
* Get processor for a given MIME type
*/
private getProcessorForMimeType(mimeType: string): FileProcessor | null {
for (const processor of this.processors.values()) {
if (processor.canProcess(mimeType)) {
return processor;
}
}
return null;
}
/**
* Get all MIME types supported by all registered processors
*/
getAllSupportedMimeTypes(): string[] {
const supportedTypes = new Set<string>();
// Gather MIME types from all registered processors
for (const processor of this.processors.values()) {
const processorTypes = processor.getSupportedMimeTypes();
processorTypes.forEach(type => supportedTypes.add(type));
}
return Array.from(supportedTypes);
}
/**
* Check if a MIME type is supported by any processor
*/
isSupportedByAnyProcessor(mimeType: string): boolean {
if (!mimeType) return false;
// Check if any processor can handle this MIME type
const processor = this.getProcessorForMimeType(mimeType);
return processor !== null;
}
/**
* Check if blob needs OCR re-processing due to content changes
*/
needsReprocessing(blobId: string): boolean {
if (!blobId) {
return false;
}
try {
const blobInfo = sql.getRow<{
utcDateModified: string;
ocr_last_processed: string | null;
}>(`
SELECT utcDateModified, ocr_last_processed
FROM blobs
WHERE blobId = ?
`, [blobId]);
if (!blobInfo) {
return false;
}
// If OCR was never processed, it needs processing
if (!blobInfo.ocr_last_processed) {
return true;
}
// If blob was modified after last OCR processing, it needs re-processing
const blobModified = new Date(blobInfo.utcDateModified);
const lastOcrProcessed = new Date(blobInfo.ocr_last_processed);
return blobModified > lastOcrProcessed;
} catch (error) {
log.error(`Failed to check if blob ${blobId} needs reprocessing: ${error}`);
return false;
}
}
/**
* Invalidate OCR results for a blob (clear ocr_text and ocr_last_processed)
*/
invalidateOCRResult(blobId: string): void {
if (!blobId) {
return;
}
try {
sql.execute(`
UPDATE blobs SET
ocr_text = NULL,
ocr_last_processed = NULL
WHERE blobId = ?
`, [blobId]);
log.info(`Invalidated OCR result for blob ${blobId}`);
} catch (error) {
log.error(`Failed to invalidate OCR result for blob ${blobId}: ${error}`);
throw error;
}
}
/**
* Get blobs that need OCR processing (modified after last OCR or never processed)
*/
getBlobsNeedingOCR(): Array<{ blobId: string; mimeType: string; entityType: 'note' | 'attachment'; entityId: string }> {
try {
// Get notes with blobs that need OCR (both image notes and file notes with supported MIME types)
const noteBlobs = sql.getRows<{
blobId: string;
mimeType: string;
entityId: string;
}>(`
SELECT n.blobId, n.mime as mimeType, n.noteId as entityId
FROM notes n
JOIN blobs b ON n.blobId = b.blobId
WHERE (
n.type = 'image'
OR (
n.type = 'file'
AND n.mime IN (
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'application/msword',
'application/vnd.ms-excel',
'application/vnd.ms-powerpoint',
'application/rtf',
'application/pdf',
'image/jpeg',
'image/jpg',
'image/png',
'image/gif',
'image/bmp',
'image/tiff',
'image/webp'
)
)
)
AND n.isDeleted = 0
AND n.blobId IS NOT NULL
AND (
b.ocr_last_processed IS NULL
OR b.utcDateModified > b.ocr_last_processed
)
`);
// Get attachments with blobs that need OCR (both image and file attachments with supported MIME types)
const attachmentBlobs = sql.getRows<{
blobId: string;
mimeType: string;
entityId: string;
}>(`
SELECT a.blobId, a.mime as mimeType, a.attachmentId as entityId
FROM attachments a
JOIN blobs b ON a.blobId = b.blobId
WHERE (
a.role = 'image'
OR (
a.role = 'file'
AND a.mime IN (
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'application/msword',
'application/vnd.ms-excel',
'application/vnd.ms-powerpoint',
'application/rtf',
'application/pdf',
'image/jpeg',
'image/jpg',
'image/png',
'image/gif',
'image/bmp',
'image/tiff',
'image/webp'
)
)
)
AND a.isDeleted = 0
AND a.blobId IS NOT NULL
AND (
b.ocr_last_processed IS NULL
OR b.utcDateModified > b.ocr_last_processed
)
`);
// Combine results
const result = [
...noteBlobs.map(blob => ({ ...blob, entityType: 'note' as const })),
...attachmentBlobs.map(blob => ({ ...blob, entityType: 'attachment' as const }))
];
// Return all results (no need to filter by MIME type as we already did in the query)
return result;
} catch (error) {
log.error(`Failed to get blobs needing OCR: ${error}`);
return [];
}
}
/**
* Process OCR for all blobs that need it (auto-processing)
*/
async processAllBlobsNeedingOCR(): Promise<void> {
if (!this.isOCREnabled()) {
log.info('OCR is disabled, skipping auto-processing');
return;
}
const blobsNeedingOCR = this.getBlobsNeedingOCR();
if (blobsNeedingOCR.length === 0) {
log.info('No blobs need OCR processing');
return;
}
log.info(`Auto-processing OCR for ${blobsNeedingOCR.length} blobs...`);
for (const blobInfo of blobsNeedingOCR) {
try {
if (blobInfo.entityType === 'note') {
await this.processNoteOCR(blobInfo.entityId);
} else {
await this.processAttachmentOCR(blobInfo.entityId);
}
// Add small delay to prevent overwhelming the system
await new Promise(resolve => setTimeout(resolve, 100));
} catch (error) {
log.error(`Failed to auto-process OCR for ${blobInfo.entityType} ${blobInfo.entityId}: ${error}`);
// Continue with other blobs
}
}
log.info('Auto-processing OCR completed');
}
}
export default new OCRService();

View File

@@ -0,0 +1,33 @@
import { OCRResult, OCRProcessingOptions } from '../ocr_service.js';
/**
* Base class for file processors that extract text from different file types
*/
export abstract class FileProcessor {
/**
* Check if this processor can handle the given MIME type
*/
abstract canProcess(mimeType: string): boolean;
/**
* Extract text from the given file buffer
*/
abstract extractText(buffer: Buffer, options: OCRProcessingOptions): Promise<OCRResult>;
/**
* Get the processing type identifier
*/
abstract getProcessingType(): string;
/**
* Get list of MIME types supported by this processor
*/
abstract getSupportedMimeTypes(): string[];
/**
* Clean up any resources
*/
cleanup(): Promise<void> {
return Promise.resolve();
}
}

View File

@@ -0,0 +1,237 @@
import Tesseract from 'tesseract.js';
import { FileProcessor } from './file_processor.js';
import { OCRResult, OCRProcessingOptions } from '../ocr_service.js';
import log from '../../log.js';
import options from '../../options.js';
/**
* Image processor for extracting text from image files using Tesseract
*/
export class ImageProcessor extends FileProcessor {
private worker: Tesseract.Worker | null = null;
private isInitialized = false;
private readonly supportedTypes = [
'image/jpeg',
'image/jpg',
'image/png',
'image/gif',
'image/bmp',
'image/tiff',
'image/webp'
];
canProcess(mimeType: string): boolean {
return this.supportedTypes.includes(mimeType.toLowerCase());
}
getSupportedMimeTypes(): string[] {
return [...this.supportedTypes];
}
async extractText(buffer: Buffer, options: OCRProcessingOptions = {}): Promise<OCRResult> {
if (!this.isInitialized) {
await this.initialize();
}
if (!this.worker) {
throw new Error('Image processor worker not initialized');
}
try {
log.info('Starting image OCR text extraction...');
// Set language if specified and different from current
// Support multi-language format like 'ron+eng'
const language = options.language || this.getDefaultOCRLanguage();
// Validate language format
if (!this.isValidLanguageFormat(language)) {
throw new Error(`Invalid OCR language format: ${language}. Use format like 'eng' or 'ron+eng'`);
}
if (language !== 'eng') {
// For different languages, create a new worker
await this.worker.terminate();
log.info(`Initializing Tesseract worker for language(s): ${language}`);
this.worker = await Tesseract.createWorker(language, 1, {
logger: (m: { status: string; progress: number }) => {
if (m.status === 'recognizing text') {
log.info(`Image OCR progress (${language}): ${Math.round(m.progress * 100)}%`);
}
}
});
}
const result = await this.worker.recognize(buffer);
// Filter text based on minimum confidence threshold
const { filteredText, overallConfidence } = this.filterTextByConfidence(result.data, options);
const ocrResult: OCRResult = {
text: filteredText,
confidence: overallConfidence,
extractedAt: new Date().toISOString(),
language: options.language || this.getDefaultOCRLanguage(),
pageCount: 1
};
log.info(`Image OCR extraction completed. Confidence: ${ocrResult.confidence}%, Text length: ${ocrResult.text.length}`);
return ocrResult;
} catch (error) {
log.error(`Image OCR text extraction failed: ${error}`);
throw error;
}
}
getProcessingType(): string {
return 'image';
}
private async initialize(): Promise<void> {
if (this.isInitialized) {
return;
}
try {
log.info('Initializing image OCR processor with Tesseract.js...');
// Configure proper paths for Node.js environment
const tesseractDir = require.resolve('tesseract.js').replace('/src/index.js', '');
const workerPath = require.resolve('tesseract.js/src/worker-script/node/index.js');
const corePath = require.resolve('tesseract.js-core/tesseract-core.wasm.js');
log.info(`Using worker path: ${workerPath}`);
log.info(`Using core path: ${corePath}`);
this.worker = await Tesseract.createWorker(this.getDefaultOCRLanguage(), 1, {
workerPath,
corePath,
logger: (m: { status: string; progress: number }) => {
if (m.status === 'recognizing text') {
log.info(`Image OCR progress: ${Math.round(m.progress * 100)}%`);
}
}
});
this.isInitialized = true;
log.info('Image OCR processor initialized successfully');
} catch (error) {
log.error(`Failed to initialize image OCR processor: ${error}`);
throw error;
}
}
async cleanup(): Promise<void> {
if (this.worker) {
await this.worker.terminate();
this.worker = null;
}
this.isInitialized = false;
log.info('Image OCR processor cleaned up');
}
/**
* Get default OCR language from options
*/
private getDefaultOCRLanguage(): string {
try {
const options = require('../../options.js').default;
const ocrLanguage = options.getOption('ocrLanguage');
if (!ocrLanguage) {
throw new Error('OCR language not configured in user settings');
}
return ocrLanguage;
} catch (error) {
log.error(`Failed to get default OCR language: ${error}`);
throw new Error('OCR language must be configured in settings before processing');
}
}
/**
* Filter text based on minimum confidence threshold
*/
private filterTextByConfidence(data: any, options: OCRProcessingOptions): { filteredText: string; overallConfidence: number } {
const minConfidence = this.getMinConfidenceThreshold();
// If no minimum confidence set, return original text
if (minConfidence <= 0) {
return {
filteredText: data.text.trim(),
overallConfidence: data.confidence / 100
};
}
let filteredWords: string[] = [];
let validConfidences: number[] = [];
// Tesseract provides word-level data
if (data.words && Array.isArray(data.words)) {
for (const word of data.words) {
const wordConfidence = word.confidence / 100; // Convert to decimal
if (wordConfidence >= minConfidence) {
filteredWords.push(word.text);
validConfidences.push(wordConfidence);
}
}
} else {
// Fallback: if word-level data not available, use overall confidence
const overallConfidence = data.confidence / 100;
if (overallConfidence >= minConfidence) {
return {
filteredText: data.text.trim(),
overallConfidence
};
} else {
log.info(`Entire text filtered out due to low confidence ${overallConfidence} (below threshold ${minConfidence})`);
return {
filteredText: '',
overallConfidence
};
}
}
// Calculate average confidence of accepted words
const averageConfidence = validConfidences.length > 0
? validConfidences.reduce((sum, conf) => sum + conf, 0) / validConfidences.length
: 0;
const filteredText = filteredWords.join(' ').trim();
log.info(`Filtered OCR text: ${filteredWords.length} words kept out of ${data.words?.length || 0} total words (min confidence: ${minConfidence})`);
return {
filteredText,
overallConfidence: averageConfidence
};
}
/**
* Get minimum confidence threshold from options
*/
private getMinConfidenceThreshold(): number {
const minConfidence = options.getOption('ocrMinConfidence') ?? 0;
return parseFloat(minConfidence);
}
/**
* Validate OCR language format
* Supports single language (eng) or multi-language (ron+eng)
*/
private isValidLanguageFormat(language: string): boolean {
if (!language || typeof language !== 'string') {
return false;
}
// Split by '+' for multi-language format
const languages = language.split('+');
// Check each language code (should be 2-7 characters, alphanumeric with underscores)
const validLanguagePattern = /^[a-zA-Z]{2,3}(_[a-zA-Z]{2,3})?$/;
return languages.every(lang => {
const trimmed = lang.trim();
return trimmed.length > 0 && validLanguagePattern.test(trimmed);
});
}
}

View File

@@ -0,0 +1,132 @@
import * as officeParser from 'officeparser';
import { FileProcessor } from './file_processor.js';
import { OCRResult, OCRProcessingOptions } from '../ocr_service.js';
import { ImageProcessor } from './image_processor.js';
import log from '../../log.js';
/**
* Office document processor for extracting text and images from DOCX/XLSX/PPTX files
*/
export class OfficeProcessor extends FileProcessor {
private imageProcessor: ImageProcessor;
private readonly supportedTypes = [
'application/vnd.openxmlformats-officedocument.wordprocessingml.document', // DOCX
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', // XLSX
'application/vnd.openxmlformats-officedocument.presentationml.presentation', // PPTX
'application/msword', // DOC
'application/vnd.ms-excel', // XLS
'application/vnd.ms-powerpoint', // PPT
'application/rtf' // RTF
];
constructor() {
super();
this.imageProcessor = new ImageProcessor();
}
canProcess(mimeType: string): boolean {
return this.supportedTypes.includes(mimeType);
}
getSupportedMimeTypes(): string[] {
return [...this.supportedTypes];
}
async extractText(buffer: Buffer, options: OCRProcessingOptions = {}): Promise<OCRResult> {
try {
log.info('Starting Office document text extraction...');
// Validate language format
const language = options.language || this.getDefaultOCRLanguage();
if (!this.isValidLanguageFormat(language)) {
throw new Error(`Invalid OCR language format: ${language}. Use format like 'eng' or 'ron+eng'`);
}
// Extract text from Office document
const data = await this.parseOfficeDocument(buffer);
// Extract text from Office document
const combinedText = data.data && data.data.trim().length > 0 ? data.data.trim() : '';
const confidence = combinedText.length > 0 ? 0.99 : 0; // High confidence for direct text extraction
const result: OCRResult = {
text: combinedText,
confidence: confidence,
extractedAt: new Date().toISOString(),
language: language,
pageCount: 1 // Office documents are treated as single logical document
};
log.info(`Office document text extraction completed. Confidence: ${confidence}%, Text length: ${result.text.length}`);
return result;
} catch (error) {
log.error(`Office document text extraction failed: ${error}`);
throw error;
}
}
private async parseOfficeDocument(buffer: Buffer): Promise<{ data: string }> {
try {
// Use promise-based API directly
const data = await officeParser.parseOfficeAsync(buffer, {
outputErrorToConsole: false,
newlineDelimiter: '\n',
ignoreNotes: false,
putNotesAtLast: false
});
return {
data: data || ''
};
} catch (error) {
throw new Error(`Office document parsing failed: ${error}`);
}
}
getProcessingType(): string {
return 'office';
}
async cleanup(): Promise<void> {
await this.imageProcessor.cleanup();
}
/**
* Get default OCR language from options
*/
private getDefaultOCRLanguage(): string {
try {
const options = require('../../options.js').default;
const ocrLanguage = options.getOption('ocrLanguage');
if (!ocrLanguage) {
throw new Error('OCR language not configured in user settings');
}
return ocrLanguage;
} catch (error) {
log.error(`Failed to get default OCR language: ${error}`);
throw new Error('OCR language must be configured in settings before processing');
}
}
/**
* Validate OCR language format
* Supports single language (eng) or multi-language (ron+eng)
*/
private isValidLanguageFormat(language: string): boolean {
if (!language || typeof language !== 'string') {
return false;
}
// Split by '+' for multi-language format
const languages = language.split('+');
// Check each language code (should be 2-7 characters, alphanumeric with underscores)
const validLanguagePattern = /^[a-zA-Z]{2,3}(_[a-zA-Z]{2,3})?$/;
return languages.every(lang => {
const trimmed = lang.trim();
return trimmed.length > 0 && validLanguagePattern.test(trimmed);
});
}
}

View File

@@ -0,0 +1,147 @@
import * as pdfParse from 'pdf-parse';
import { FileProcessor } from './file_processor.js';
import { OCRResult, OCRProcessingOptions } from '../ocr_service.js';
import { ImageProcessor } from './image_processor.js';
import log from '../../log.js';
import sharp from 'sharp';
/**
* PDF processor for extracting text from PDF files
* First tries to extract existing text, then falls back to OCR on images
*/
export class PDFProcessor extends FileProcessor {
private imageProcessor: ImageProcessor;
private readonly supportedTypes = ['application/pdf'];
constructor() {
super();
this.imageProcessor = new ImageProcessor();
}
canProcess(mimeType: string): boolean {
return mimeType.toLowerCase() === 'application/pdf';
}
getSupportedMimeTypes(): string[] {
return [...this.supportedTypes];
}
async extractText(buffer: Buffer, options: OCRProcessingOptions = {}): Promise<OCRResult> {
try {
log.info('Starting PDF text extraction...');
// Validate language format
const language = options.language || this.getDefaultOCRLanguage();
if (!this.isValidLanguageFormat(language)) {
throw new Error(`Invalid OCR language format: ${language}. Use format like 'eng' or 'ron+eng'`);
}
// First try to extract existing text from PDF
if (options.enablePDFTextExtraction !== false) {
const textResult = await this.extractTextFromPDF(buffer, options);
if (textResult.text.trim().length > 0) {
log.info(`PDF text extraction successful. Length: ${textResult.text.length}`);
return textResult;
}
}
// Fall back to OCR if no text found or PDF text extraction is disabled
log.info('No text found in PDF or text extraction disabled, falling back to OCR...');
return await this.extractTextViaOCR(buffer, options);
} catch (error) {
log.error(`PDF text extraction failed: ${error}`);
throw error;
}
}
private async extractTextFromPDF(buffer: Buffer, options: OCRProcessingOptions): Promise<OCRResult> {
try {
const data = await pdfParse(buffer);
return {
text: data.text.trim(),
confidence: 0.99, // High confidence for direct text extraction
extractedAt: new Date().toISOString(),
language: options.language || this.getDefaultOCRLanguage(),
pageCount: data.numpages
};
} catch (error) {
log.error(`PDF text extraction failed: ${error}`);
throw error;
}
}
private async extractTextViaOCR(buffer: Buffer, options: OCRProcessingOptions): Promise<OCRResult> {
try {
// Convert PDF to images and OCR each page
// For now, we'll use a simple approach - convert first page to image
// In a full implementation, we'd convert all pages
// This is a simplified implementation
// In practice, you might want to use pdf2pic or similar library
// to convert PDF pages to images for OCR
// For now, we'll return a placeholder result
// indicating that OCR on PDF is not fully implemented
log.info('PDF to image conversion not fully implemented, returning placeholder');
return {
text: '[PDF OCR not fully implemented - would convert PDF pages to images and OCR each page]',
confidence: 0.0,
extractedAt: new Date().toISOString(),
language: options.language || this.getDefaultOCRLanguage(),
pageCount: 1
};
} catch (error) {
log.error(`PDF OCR extraction failed: ${error}`);
throw error;
}
}
getProcessingType(): string {
return 'pdf';
}
async cleanup(): Promise<void> {
await this.imageProcessor.cleanup();
}
/**
* Get default OCR language from options
*/
private getDefaultOCRLanguage(): string {
try {
const options = require('../../options.js').default;
const ocrLanguage = options.getOption('ocrLanguage');
if (!ocrLanguage) {
throw new Error('OCR language not configured in user settings');
}
return ocrLanguage;
} catch (error) {
log.error(`Failed to get default OCR language: ${error}`);
throw new Error('OCR language must be configured in settings before processing');
}
}
/**
* Validate OCR language format
* Supports single language (eng) or multi-language (ron+eng)
*/
private isValidLanguageFormat(language: string): boolean {
if (!language || typeof language !== 'string') {
return false;
}
// Split by '+' for multi-language format
const languages = language.split('+');
// Check each language code (should be 2-7 characters, alphanumeric with underscores)
const validLanguagePattern = /^[a-zA-Z]{2,3}(_[a-zA-Z]{2,3})?$/;
return languages.every(lang => {
const trimmed = lang.trim();
return trimmed.length > 0 && validLanguagePattern.test(trimmed);
});
}
}

View File

@@ -0,0 +1,134 @@
import sharp from 'sharp';
import { FileProcessor } from './file_processor.js';
import { OCRResult, OCRProcessingOptions } from '../ocr_service.js';
import { ImageProcessor } from './image_processor.js';
import log from '../../log.js';
/**
* TIFF processor for extracting text from multi-page TIFF files
*/
export class TIFFProcessor extends FileProcessor {
private imageProcessor: ImageProcessor;
private readonly supportedTypes = ['image/tiff', 'image/tif'];
constructor() {
super();
this.imageProcessor = new ImageProcessor();
}
canProcess(mimeType: string): boolean {
return mimeType.toLowerCase() === 'image/tiff' || mimeType.toLowerCase() === 'image/tif';
}
getSupportedMimeTypes(): string[] {
return [...this.supportedTypes];
}
async extractText(buffer: Buffer, options: OCRProcessingOptions = {}): Promise<OCRResult> {
try {
log.info('Starting TIFF text extraction...');
// Validate language format
const language = options.language || this.getDefaultOCRLanguage();
if (!this.isValidLanguageFormat(language)) {
throw new Error(`Invalid OCR language format: ${language}. Use format like 'eng' or 'ron+eng'`);
}
// Check if this is a multi-page TIFF
const metadata = await sharp(buffer).metadata();
const pageCount = metadata.pages || 1;
let combinedText = '';
let totalConfidence = 0;
// Process each page
for (let page = 0; page < pageCount; page++) {
try {
log.info(`Processing TIFF page ${page + 1}/${pageCount}...`);
// Extract page as PNG buffer
const pageBuffer = await sharp(buffer, { page })
.png()
.toBuffer();
// OCR the page
const pageResult = await this.imageProcessor.extractText(pageBuffer, options);
if (pageResult.text.trim().length > 0) {
if (combinedText.length > 0) {
combinedText += '\n\n--- Page ' + (page + 1) + ' ---\n';
}
combinedText += pageResult.text;
totalConfidence += pageResult.confidence;
}
} catch (error) {
log.error(`Failed to process TIFF page ${page + 1}: ${error}`);
// Continue with other pages
}
}
const averageConfidence = pageCount > 0 ? totalConfidence / pageCount : 0;
const result: OCRResult = {
text: combinedText.trim(),
confidence: averageConfidence,
extractedAt: new Date().toISOString(),
language: options.language || this.getDefaultOCRLanguage(),
pageCount: pageCount
};
log.info(`TIFF text extraction completed. Pages: ${pageCount}, Confidence: ${averageConfidence}%, Text length: ${result.text.length}`);
return result;
} catch (error) {
log.error(`TIFF text extraction failed: ${error}`);
throw error;
}
}
getProcessingType(): string {
return 'tiff';
}
async cleanup(): Promise<void> {
await this.imageProcessor.cleanup();
}
/**
* Get default OCR language from options
*/
private getDefaultOCRLanguage(): string {
try {
const options = require('../../options.js').default;
const ocrLanguage = options.getOption('ocrLanguage');
if (!ocrLanguage) {
throw new Error('OCR language not configured in user settings');
}
return ocrLanguage;
} catch (error) {
log.error(`Failed to get default OCR language: ${error}`);
throw new Error('OCR language must be configured in settings before processing');
}
}
/**
* Validate OCR language format
* Supports single language (eng) or multi-language (ron+eng)
*/
private isValidLanguageFormat(language: string): boolean {
if (!language || typeof language !== 'string') {
return false;
}
// Split by '+' for multi-language format
const languages = language.split('+');
// Check each language code (should be 2-7 characters, alphanumeric with underscores)
const validLanguagePattern = /^[a-zA-Z]{2,3}(_[a-zA-Z]{2,3})?$/;
return languages.every(lang => {
const trimmed = lang.trim();
return trimmed.length > 0 && validLanguagePattern.test(trimmed);
});
}
}

View File

@@ -211,6 +211,12 @@ const defaultOptions: DefaultOption[] = [
{ name: "aiTemperature", value: "0.7", isSynced: true },
{ name: "aiSystemPrompt", value: "", isSynced: true },
{ name: "aiSelectedProvider", value: "openai", isSynced: true },
// OCR options
{ name: "ocrEnabled", value: "false", isSynced: true },
{ name: "ocrLanguage", value: "eng", isSynced: true },
{ name: "ocrAutoProcessImages", value: "true", isSynced: true },
{ name: "ocrMinConfidence", value: "0.55", isSynced: true },
];
/**

View File

@@ -0,0 +1,111 @@
import Expression from "./expression.js";
import SearchContext from "../search_context.js";
import NoteSet from "../note_set.js";
import sql from "../../sql.js";
import becca from "../../../becca/becca.js";
/**
* Search expression for finding text within OCR-extracted content from images
*/
export default class OCRContentExpression extends Expression {
private searchText: string;
constructor(searchText: string) {
super();
this.searchText = searchText;
}
execute(inputNoteSet: NoteSet, executionContext: object, searchContext: SearchContext): NoteSet {
// Don't search OCR content if it's not enabled
if (!this.isOCRSearchEnabled()) {
return new NoteSet();
}
const resultNoteSet = new NoteSet();
const ocrResults = this.searchOCRContent(this.searchText);
for (const ocrResult of ocrResults) {
// Find notes that use this blob
const notes = sql.getRows<{noteId: string}>(`
SELECT noteId FROM notes
WHERE blobId = ? AND isDeleted = 0
`, [ocrResult.blobId]);
for (const noteRow of notes) {
const note = becca.getNote(noteRow.noteId);
if (note && !note.isDeleted && inputNoteSet.hasNoteId(note.noteId)) {
resultNoteSet.add(note);
}
}
// Find attachments that use this blob and their parent notes
const attachments = sql.getRows<{ownerId: string}>(`
SELECT ownerId FROM attachments
WHERE blobId = ? AND isDeleted = 0
`, [ocrResult.blobId]);
for (const attachmentRow of attachments) {
const note = becca.getNote(attachmentRow.ownerId);
if (note && !note.isDeleted && inputNoteSet.hasNoteId(note.noteId)) {
resultNoteSet.add(note);
}
}
}
// Add highlight tokens for OCR matches
if (ocrResults.length > 0) {
const tokens = this.extractHighlightTokens(this.searchText);
searchContext.highlightedTokens.push(...tokens);
}
return resultNoteSet;
}
private isOCRSearchEnabled(): boolean {
try {
const optionService = require('../../options.js').default;
return optionService.getOptionBool('ocrEnabled');
} catch {
return false;
}
}
private searchOCRContent(searchText: string): Array<{
blobId: string;
ocr_text: string;
}> {
try {
// Search in blobs table for OCR text
const query = `
SELECT blobId, ocr_text
FROM blobs
WHERE ocr_text LIKE ?
AND ocr_text IS NOT NULL
AND ocr_text != ''
LIMIT 50
`;
const params = [`%${searchText}%`];
return sql.getRows<{
blobId: string;
ocr_text: string;
}>(query, params);
} catch (error) {
console.error('Error searching OCR content:', error);
return [];
}
}
private extractHighlightTokens(searchText: string): string[] {
// Split search text into words and return them as highlight tokens
return searchText
.split(/\s+/)
.filter(token => token.length > 2)
.map(token => token.toLowerCase());
}
toString(): string {
return `OCRContent('${this.searchText}')`;
}
}

View File

@@ -2,6 +2,8 @@
import beccaService from "../../becca/becca_service.js";
import becca from "../../becca/becca.js";
import sql from "../sql.js";
import options from "../options.js";
class SearchResult {
notePathArray: string[];
@@ -48,6 +50,9 @@ class SearchResult {
this.addScoreForStrings(tokens, note.title, 2.0); // Increased to give more weight to title matches
this.addScoreForStrings(tokens, this.notePathTitle, 0.3); // Reduced to further de-emphasize path matches
// Add OCR scoring - weight between title and content matches
this.addOCRScore(tokens, 1.5);
if (note.isInHiddenSubtree()) {
this.score = this.score / 3; // Increased penalty for hidden notes
}
@@ -70,6 +75,37 @@ class SearchResult {
}
this.score += tokenScore;
}
addOCRScore(tokens: string[], factor: number) {
try {
// Check if OCR is enabled
if (!options.getOptionBool('ocrEnabled')) {
return;
}
// Search for OCR results for this note and its attachments
const ocrResults = sql.getRows(`
SELECT b.ocr_text
FROM blobs b
WHERE b.ocr_text IS NOT NULL
AND b.ocr_text != ''
AND (
b.blobId = (SELECT blobId FROM notes WHERE noteId = ? AND isDeleted = 0)
OR b.blobId IN (
SELECT blobId FROM attachments WHERE ownerId = ? AND isDeleted = 0
)
)
`, [this.noteId, this.noteId]);
for (const ocrResult of ocrResults as Array<{ocr_text: string}>) {
// Add score for OCR text matches
this.addScoreForStrings(tokens, ocrResult.ocr_text, factor);
}
} catch (error) {
// Silently fail if OCR service is not available
console.debug('OCR scoring failed:', error);
}
}
}
export default SearchResult;

View File

@@ -0,0 +1,337 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
// Mock dependencies
const mockSql = {
getRows: vi.fn()
};
const mockOptions = {
getOptionBool: vi.fn()
};
const mockBecca = {
notes: {},
getNote: vi.fn()
};
const mockBeccaService = {
getNoteTitleForPath: vi.fn()
};
vi.mock('../sql.js', () => ({
default: mockSql
}));
vi.mock('../options.js', () => ({
default: mockOptions
}));
// The SearchResult now uses proper ES imports which are mocked above
vi.mock('../../becca/becca.js', () => ({
default: mockBecca
}));
vi.mock('../../becca/becca_service.js', () => ({
default: mockBeccaService
}));
// Import SearchResult after mocking
let SearchResult: any;
beforeEach(async () => {
vi.clearAllMocks();
// Reset mock implementations
mockOptions.getOptionBool.mockReturnValue(true);
mockSql.getRows.mockReturnValue([]);
mockBeccaService.getNoteTitleForPath.mockReturnValue('Test Note Title');
// Setup mock note
const mockNote = {
noteId: 'test123',
title: 'Test Note',
isInHiddenSubtree: vi.fn().mockReturnValue(false)
};
mockBecca.notes['test123'] = mockNote;
// Dynamically import SearchResult
const module = await import('./search_result.js');
SearchResult = module.default;
});
describe('SearchResult', () => {
describe('constructor', () => {
it('should initialize with note path array', () => {
const searchResult = new SearchResult(['root', 'folder', 'test123']);
expect(searchResult.notePathArray).toEqual(['root', 'folder', 'test123']);
expect(searchResult.noteId).toBe('test123');
expect(searchResult.notePath).toBe('root/folder/test123');
expect(searchResult.score).toBe(0);
expect(mockBeccaService.getNoteTitleForPath).toHaveBeenCalledWith(['root', 'folder', 'test123']);
});
});
describe('computeScore', () => {
let searchResult: any;
beforeEach(() => {
searchResult = new SearchResult(['root', 'test123']);
});
describe('basic scoring', () => {
it('should give highest score for exact note ID match', () => {
searchResult.computeScore('test123', ['test123']);
expect(searchResult.score).toBeGreaterThanOrEqual(1000);
});
it('should give high score for exact title match', () => {
searchResult.computeScore('test note', ['test', 'note']);
expect(searchResult.score).toBeGreaterThan(2000);
});
it('should give medium score for title prefix match', () => {
searchResult.computeScore('test', ['test']);
expect(searchResult.score).toBeGreaterThan(500);
});
it('should give lower score for title word match', () => {
mockBecca.notes['test123'].title = 'This is a test note';
searchResult.computeScore('test', ['test']);
expect(searchResult.score).toBeGreaterThan(300);
});
});
describe('OCR scoring integration', () => {
beforeEach(() => {
// Mock OCR-enabled
mockOptions.getOptionBool.mockReturnValue(true);
});
it('should add OCR score when OCR results exist', () => {
const mockOCRResults = [
{
extracted_text: 'sample text from image',
confidence: 0.95
}
];
mockSql.getRows.mockReturnValue(mockOCRResults);
searchResult.computeScore('sample', ['sample']);
expect(mockSql.getRows).toHaveBeenCalledWith(
expect.stringContaining('FROM ocr_results'),
['test123', 'test123']
);
expect(searchResult.score).toBeGreaterThan(0);
});
it('should apply confidence weighting to OCR scores', () => {
const highConfidenceResult = [
{
extracted_text: 'sample text',
confidence: 0.95
}
];
const lowConfidenceResult = [
{
extracted_text: 'sample text',
confidence: 0.30
}
];
// Test high confidence
mockSql.getRows.mockReturnValue(highConfidenceResult);
searchResult.computeScore('sample', ['sample']);
const highConfidenceScore = searchResult.score;
// Reset and test low confidence
searchResult.score = 0;
mockSql.getRows.mockReturnValue(lowConfidenceResult);
searchResult.computeScore('sample', ['sample']);
const lowConfidenceScore = searchResult.score;
expect(highConfidenceScore).toBeGreaterThan(lowConfidenceScore);
});
it('should handle multiple OCR results', () => {
const multipleResults = [
{
extracted_text: 'first sample text',
confidence: 0.90
},
{
extracted_text: 'second sample document',
confidence: 0.85
}
];
mockSql.getRows.mockReturnValue(multipleResults);
searchResult.computeScore('sample', ['sample']);
expect(searchResult.score).toBeGreaterThan(0);
// Score should account for multiple matches
});
it('should skip OCR scoring when OCR is disabled', () => {
mockOptions.getOptionBool.mockReturnValue(false);
searchResult.computeScore('sample', ['sample']);
expect(mockSql.getRows).not.toHaveBeenCalled();
});
it('should handle OCR scoring errors gracefully', () => {
mockSql.getRows.mockImplementation(() => {
throw new Error('Database error');
});
expect(() => {
searchResult.computeScore('sample', ['sample']);
}).not.toThrow();
// Score should still be calculated from other factors
expect(searchResult.score).toBeGreaterThanOrEqual(0);
});
});
describe('hidden notes penalty', () => {
it('should apply penalty for hidden notes', () => {
mockBecca.notes['test123'].isInHiddenSubtree.mockReturnValue(true);
searchResult.computeScore('test', ['test']);
const hiddenScore = searchResult.score;
// Reset and test non-hidden
mockBecca.notes['test123'].isInHiddenSubtree.mockReturnValue(false);
searchResult.score = 0;
searchResult.computeScore('test', ['test']);
const normalScore = searchResult.score;
expect(normalScore).toBeGreaterThan(hiddenScore);
expect(hiddenScore).toBe(normalScore / 3);
});
});
});
describe('addScoreForStrings', () => {
let searchResult: any;
beforeEach(() => {
searchResult = new SearchResult(['root', 'test123']);
});
it('should give highest score for exact token match', () => {
searchResult.addScoreForStrings(['sample'], 'sample text', 1.0);
const exactScore = searchResult.score;
searchResult.score = 0;
searchResult.addScoreForStrings(['sample'], 'sampling text', 1.0);
const prefixScore = searchResult.score;
searchResult.score = 0;
searchResult.addScoreForStrings(['sample'], 'text sample text', 1.0);
const partialScore = searchResult.score;
expect(exactScore).toBeGreaterThan(prefixScore);
expect(exactScore).toBeGreaterThanOrEqual(partialScore);
});
it('should apply factor multiplier correctly', () => {
searchResult.addScoreForStrings(['sample'], 'sample text', 2.0);
const doubleFactorScore = searchResult.score;
searchResult.score = 0;
searchResult.addScoreForStrings(['sample'], 'sample text', 1.0);
const singleFactorScore = searchResult.score;
expect(doubleFactorScore).toBe(singleFactorScore * 2);
});
it('should handle multiple tokens', () => {
searchResult.addScoreForStrings(['hello', 'world'], 'hello world test', 1.0);
expect(searchResult.score).toBeGreaterThan(0);
});
it('should be case insensitive', () => {
searchResult.addScoreForStrings(['sample'], 'sample text', 1.0);
const lowerCaseScore = searchResult.score;
searchResult.score = 0;
searchResult.addScoreForStrings(['sample'], 'SAMPLE text', 1.0);
const upperCaseScore = searchResult.score;
expect(upperCaseScore).toEqual(lowerCaseScore);
expect(upperCaseScore).toBeGreaterThan(0);
});
});
describe('addOCRScore', () => {
let searchResult: any;
beforeEach(() => {
searchResult = new SearchResult(['root', 'test123']);
});
it('should query for both note and attachment OCR results', () => {
mockOptions.getOptionBool.mockReturnValue(true);
mockSql.getRows.mockReturnValue([]);
searchResult.addOCRScore(['sample'], 1.5);
expect(mockSql.getRows).toHaveBeenCalledWith(
expect.stringContaining('FROM ocr_results'),
['test123', 'test123']
);
});
it('should apply minimum confidence multiplier', () => {
mockOptions.getOptionBool.mockReturnValue(true);
const lowConfidenceResult = [
{
extracted_text: 'sample text',
confidence: 0.1 // Very low confidence
}
];
mockSql.getRows.mockReturnValue(lowConfidenceResult);
searchResult.addOCRScore(['sample'], 1.0);
// Should still get some score due to minimum 0.5x multiplier
expect(searchResult.score).toBeGreaterThan(0);
});
it('should handle database query errors', () => {
mockOptions.getOptionBool.mockReturnValue(true);
mockSql.getRows.mockImplementation(() => {
throw new Error('Database connection failed');
});
// Should not throw error
expect(() => {
searchResult.addOCRScore(['sample'], 1.5);
}).not.toThrow();
});
it('should skip when OCR is disabled', () => {
mockOptions.getOptionBool.mockReturnValue(false);
searchResult.addOCRScore(['sample'], 1.5);
expect(mockSql.getRows).not.toHaveBeenCalled();
});
it('should handle options service errors', () => {
mockOptions.getOptionBool.mockImplementation(() => {
throw new Error('Options service unavailable');
});
expect(() => {
searchResult.addOCRScore(['sample'], 1.5);
}).not.toThrow();
expect(mockSql.getRows).not.toHaveBeenCalled();
});
});
});

View File

@@ -20,6 +20,7 @@ import ValueExtractor from "../value_extractor.js";
import { removeDiacritic } from "../../utils.js";
import TrueExp from "../expressions/true.js";
import IsHiddenExp from "../expressions/is_hidden.js";
import OCRContentExpression from "../expressions/ocr_content.js";
import type SearchContext from "../search_context.js";
import type { TokenData, TokenStructure } from "./types.js";
import type Expression from "../expressions/expression.js";
@@ -33,11 +34,20 @@ function getFulltext(_tokens: TokenData[], searchContext: SearchContext) {
return null;
}
const searchExpressions: Expression[] = [
new NoteFlatTextExp(tokens)
];
if (!searchContext.fastSearch) {
return new OrExp([new NoteFlatTextExp(tokens), new NoteContentFulltextExp("*=*", { tokens, flatText: true })]);
} else {
return new NoteFlatTextExp(tokens);
searchExpressions.push(new NoteContentFulltextExp("*=*", { tokens, flatText: true }));
// Add OCR content search for each token
for (const token of tokens) {
searchExpressions.push(new OCRContentExpression(token));
}
}
return new OrExp(searchExpressions);
}
const OPERATORS = new Set(["=", "!=", "*=*", "*=", "=*", ">", ">=", "<", "<=", "%="]);

BIN
eng.traineddata Normal file

Binary file not shown.

View File

@@ -146,6 +146,12 @@ export interface OptionDefinitions extends KeyboardShortcutsOptions<KeyboardActi
codeOpenAiModel: string;
aiSelectedProvider: string;
// OCR options
ocrEnabled: boolean;
ocrLanguage: string;
ocrAutoProcessImages: boolean;
ocrMinConfidence: string;
}
export type OptionNames = keyof OptionDefinitions;

View File

@@ -70,6 +70,7 @@ export interface BlobRow {
blobId: string;
content: string | Buffer;
contentLength: number;
ocr_text?: string | null;
dateModified: string;
utcDateModified: string;
}

543
pnpm-lock.yaml generated
View File

@@ -581,6 +581,9 @@ importers:
'@types/swagger-ui-express':
specifier: 4.1.8
version: 4.1.8
'@types/tesseract.js':
specifier: 2.0.0
version: 2.0.0(encoding@0.1.13)
'@types/tmp':
specifier: 0.2.6
version: 0.2.6
@@ -725,12 +728,18 @@ importers:
normalize-strings:
specifier: 1.1.1
version: 1.1.1
officeparser:
specifier: 5.2.0
version: 5.2.0
ollama:
specifier: 0.5.16
version: 0.5.16
openai:
specifier: 5.10.2
version: 5.10.2(ws@8.18.3(bufferutil@4.0.9)(utf-8-validate@6.0.5))(zod@3.24.4)
pdf-parse:
specifier: 1.1.1
version: 1.1.1
rand-token:
specifier: 1.0.1
version: 1.0.1
@@ -749,6 +758,9 @@ importers:
serve-favicon:
specifier: 2.5.1
version: 2.5.1
sharp:
specifier: 0.34.3
version: 0.34.3
stream-throttle:
specifier: 0.1.3
version: 0.1.3
@@ -767,6 +779,9 @@ importers:
swagger-ui-express:
specifier: 5.0.1
version: 5.0.1(express@5.1.0)
tesseract.js:
specifier: 6.0.1
version: 6.0.1(encoding@0.1.13)
time2fa:
specifier: ^1.3.0
version: 1.4.2
@@ -3443,6 +3458,128 @@ packages:
'@iconify/utils@2.3.0':
resolution: {integrity: sha512-GmQ78prtwYW6EtzXRU1rY+KwOKfz32PD7iJh6Iyqw68GiKuoZ2A6pRtzWONz5VQJbp50mEjXh/7NkumtrAgRKA==}
'@img/sharp-darwin-arm64@0.34.3':
resolution: {integrity: sha512-ryFMfvxxpQRsgZJqBd4wsttYQbCxsJksrv9Lw/v798JcQ8+w84mBWuXwl+TT0WJ/WrYOLaYpwQXi3sA9nTIaIg==}
engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
cpu: [arm64]
os: [darwin]
'@img/sharp-darwin-x64@0.34.3':
resolution: {integrity: sha512-yHpJYynROAj12TA6qil58hmPmAwxKKC7reUqtGLzsOHfP7/rniNGTL8tjWX6L3CTV4+5P4ypcS7Pp+7OB+8ihA==}
engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
cpu: [x64]
os: [darwin]
'@img/sharp-libvips-darwin-arm64@1.2.0':
resolution: {integrity: sha512-sBZmpwmxqwlqG9ueWFXtockhsxefaV6O84BMOrhtg/YqbTaRdqDE7hxraVE3y6gVM4eExmfzW4a8el9ArLeEiQ==}
cpu: [arm64]
os: [darwin]
'@img/sharp-libvips-darwin-x64@1.2.0':
resolution: {integrity: sha512-M64XVuL94OgiNHa5/m2YvEQI5q2cl9d/wk0qFTDVXcYzi43lxuiFTftMR1tOnFQovVXNZJ5TURSDK2pNe9Yzqg==}
cpu: [x64]
os: [darwin]
'@img/sharp-libvips-linux-arm64@1.2.0':
resolution: {integrity: sha512-RXwd0CgG+uPRX5YYrkzKyalt2OJYRiJQ8ED/fi1tq9WQW2jsQIn0tqrlR5l5dr/rjqq6AHAxURhj2DVjyQWSOA==}
cpu: [arm64]
os: [linux]
'@img/sharp-libvips-linux-arm@1.2.0':
resolution: {integrity: sha512-mWd2uWvDtL/nvIzThLq3fr2nnGfyr/XMXlq8ZJ9WMR6PXijHlC3ksp0IpuhK6bougvQrchUAfzRLnbsen0Cqvw==}
cpu: [arm]
os: [linux]
'@img/sharp-libvips-linux-ppc64@1.2.0':
resolution: {integrity: sha512-Xod/7KaDDHkYu2phxxfeEPXfVXFKx70EAFZ0qyUdOjCcxbjqyJOEUpDe6RIyaunGxT34Anf9ue/wuWOqBW2WcQ==}
cpu: [ppc64]
os: [linux]
'@img/sharp-libvips-linux-s390x@1.2.0':
resolution: {integrity: sha512-eMKfzDxLGT8mnmPJTNMcjfO33fLiTDsrMlUVcp6b96ETbnJmd4uvZxVJSKPQfS+odwfVaGifhsB07J1LynFehw==}
cpu: [s390x]
os: [linux]
'@img/sharp-libvips-linux-x64@1.2.0':
resolution: {integrity: sha512-ZW3FPWIc7K1sH9E3nxIGB3y3dZkpJlMnkk7z5tu1nSkBoCgw2nSRTFHI5pB/3CQaJM0pdzMF3paf9ckKMSE9Tg==}
cpu: [x64]
os: [linux]
'@img/sharp-libvips-linuxmusl-arm64@1.2.0':
resolution: {integrity: sha512-UG+LqQJbf5VJ8NWJ5Z3tdIe/HXjuIdo4JeVNADXBFuG7z9zjoegpzzGIyV5zQKi4zaJjnAd2+g2nna8TZvuW9Q==}
cpu: [arm64]
os: [linux]
'@img/sharp-libvips-linuxmusl-x64@1.2.0':
resolution: {integrity: sha512-SRYOLR7CXPgNze8akZwjoGBoN1ThNZoqpOgfnOxmWsklTGVfJiGJoC/Lod7aNMGA1jSsKWM1+HRX43OP6p9+6Q==}
cpu: [x64]
os: [linux]
'@img/sharp-linux-arm64@0.34.3':
resolution: {integrity: sha512-QdrKe3EvQrqwkDrtuTIjI0bu6YEJHTgEeqdzI3uWJOH6G1O8Nl1iEeVYRGdj1h5I21CqxSvQp1Yv7xeU3ZewbA==}
engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
cpu: [arm64]
os: [linux]
'@img/sharp-linux-arm@0.34.3':
resolution: {integrity: sha512-oBK9l+h6KBN0i3dC8rYntLiVfW8D8wH+NPNT3O/WBHeW0OQWCjfWksLUaPidsrDKpJgXp3G3/hkmhptAW0I3+A==}
engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
cpu: [arm]
os: [linux]
'@img/sharp-linux-ppc64@0.34.3':
resolution: {integrity: sha512-GLtbLQMCNC5nxuImPR2+RgrviwKwVql28FWZIW1zWruy6zLgA5/x2ZXk3mxj58X/tszVF69KK0Is83V8YgWhLA==}
engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
cpu: [ppc64]
os: [linux]
'@img/sharp-linux-s390x@0.34.3':
resolution: {integrity: sha512-3gahT+A6c4cdc2edhsLHmIOXMb17ltffJlxR0aC2VPZfwKoTGZec6u5GrFgdR7ciJSsHT27BD3TIuGcuRT0KmQ==}
engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
cpu: [s390x]
os: [linux]
'@img/sharp-linux-x64@0.34.3':
resolution: {integrity: sha512-8kYso8d806ypnSq3/Ly0QEw90V5ZoHh10yH0HnrzOCr6DKAPI6QVHvwleqMkVQ0m+fc7EH8ah0BB0QPuWY6zJQ==}
engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
cpu: [x64]
os: [linux]
'@img/sharp-linuxmusl-arm64@0.34.3':
resolution: {integrity: sha512-vAjbHDlr4izEiXM1OTggpCcPg9tn4YriK5vAjowJsHwdBIdx0fYRsURkxLG2RLm9gyBq66gwtWI8Gx0/ov+JKQ==}
engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
cpu: [arm64]
os: [linux]
'@img/sharp-linuxmusl-x64@0.34.3':
resolution: {integrity: sha512-gCWUn9547K5bwvOn9l5XGAEjVTTRji4aPTqLzGXHvIr6bIDZKNTA34seMPgM0WmSf+RYBH411VavCejp3PkOeQ==}
engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
cpu: [x64]
os: [linux]
'@img/sharp-wasm32@0.34.3':
resolution: {integrity: sha512-+CyRcpagHMGteySaWos8IbnXcHgfDn7pO2fiC2slJxvNq9gDipYBN42/RagzctVRKgxATmfqOSulgZv5e1RdMg==}
engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
cpu: [wasm32]
'@img/sharp-win32-arm64@0.34.3':
resolution: {integrity: sha512-MjnHPnbqMXNC2UgeLJtX4XqoVHHlZNd+nPt1kRPmj63wURegwBhZlApELdtxM2OIZDRv/DFtLcNhVbd1z8GYXQ==}
engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
cpu: [arm64]
os: [win32]
'@img/sharp-win32-ia32@0.34.3':
resolution: {integrity: sha512-xuCdhH44WxuXgOM714hn4amodJMZl3OEvf0GVTm0BEyMeA2to+8HEdRPShH0SLYptJY1uBw+SCFP9WVQi1Q/cw==}
engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
cpu: [ia32]
os: [win32]
'@img/sharp-win32-x64@0.34.3':
resolution: {integrity: sha512-OWwz05d++TxzLEv4VnsTz5CmZ6mI6S05sfQGEMrNrQcOEERbX46332IvE7pO/EUiw7jUrrS40z/M7kPyjfl04g==}
engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
cpu: [x64]
os: [win32]
'@inlang/paraglide-js@2.2.0':
resolution: {integrity: sha512-pkpXu1LanvpcAbvpVPf7PgF11Uq7DliSEBngrcUN36l4ZOOpzn3QBTvVr/tJxvks0O67WseQgiMHet8KH7Oz5A==}
hasBin: true
@@ -3894,6 +4031,70 @@ packages:
resolution: {integrity: sha512-wK+5pLK5XFmgtH3aQ2YVvA3HohS3xqV/OxuVOdNx9Wpnz7VE/fnC+e1A7ln6LFYeck7gOJ/dsZV6OLplOtAJ2w==}
engines: {node: '>=18'}
'@napi-rs/canvas-android-arm64@0.1.73':
resolution: {integrity: sha512-s8dMhfYIHVv7gz8BXg3Nb6cFi950Y0xH5R/sotNZzUVvU9EVqHfkqiGJ4UIqu+15UhqguT6mI3Bv1mhpRkmMQw==}
engines: {node: '>= 10'}
cpu: [arm64]
os: [android]
'@napi-rs/canvas-darwin-arm64@0.1.73':
resolution: {integrity: sha512-bLPCq8Yyq1vMdVdIpQAqmgf6VGUknk8e7NdSZXJJFOA9gxkJ1RGcHOwoXo7h0gzhHxSorg71hIxyxtwXpq10Rw==}
engines: {node: '>= 10'}
cpu: [arm64]
os: [darwin]
'@napi-rs/canvas-darwin-x64@0.1.73':
resolution: {integrity: sha512-GR1CcehDjdNYXN3bj8PIXcXfYLUUOQANjQpM+KNnmpRo7ojsuqPjT7ZVH+6zoG/aqRJWhiSo+ChQMRazZlRU9g==}
engines: {node: '>= 10'}
cpu: [x64]
os: [darwin]
'@napi-rs/canvas-linux-arm-gnueabihf@0.1.73':
resolution: {integrity: sha512-cM7F0kBJVFio0+U2iKSW4fWSfYQ8CPg4/DRZodSum/GcIyfB8+UPJSRM1BvvlcWinKLfX1zUYOwonZX9IFRRcw==}
engines: {node: '>= 10'}
cpu: [arm]
os: [linux]
'@napi-rs/canvas-linux-arm64-gnu@0.1.73':
resolution: {integrity: sha512-PMWNrMON9uz9klz1B8ZY/RXepQSC5dxxHQTowfw93Tb3fLtWO5oNX2k9utw7OM4ypT9BUZUWJnDQ5bfuXc/EUQ==}
engines: {node: '>= 10'}
cpu: [arm64]
os: [linux]
'@napi-rs/canvas-linux-arm64-musl@0.1.73':
resolution: {integrity: sha512-lX0z2bNmnk1PGZ+0a9OZwI2lPPvWjRYzPqvEitXX7lspyLFrOzh2kcQiLL7bhyODN23QvfriqwYqp5GreSzVvA==}
engines: {node: '>= 10'}
cpu: [arm64]
os: [linux]
'@napi-rs/canvas-linux-riscv64-gnu@0.1.73':
resolution: {integrity: sha512-QDQgMElwxAoADsSR3UYvdTTQk5XOyD9J5kq15Z8XpGwpZOZsSE0zZ/X1JaOtS2x+HEZL6z1S6MF/1uhZFZb5ig==}
engines: {node: '>= 10'}
cpu: [riscv64]
os: [linux]
'@napi-rs/canvas-linux-x64-gnu@0.1.73':
resolution: {integrity: sha512-wbzLJrTalQrpyrU1YRrO6w6pdr5vcebbJa+Aut5QfTaW9eEmMb1WFG6l1V+cCa5LdHmRr8bsvl0nJDU/IYDsmw==}
engines: {node: '>= 10'}
cpu: [x64]
os: [linux]
'@napi-rs/canvas-linux-x64-musl@0.1.73':
resolution: {integrity: sha512-xbfhYrUufoTAKvsEx2ZUN4jvACabIF0h1F5Ik1Rk4e/kQq6c+Dwa5QF0bGrfLhceLpzHT0pCMGMDeQKQrcUIyA==}
engines: {node: '>= 10'}
cpu: [x64]
os: [linux]
'@napi-rs/canvas-win32-x64-msvc@0.1.73':
resolution: {integrity: sha512-YQmHXBufFBdWqhx+ympeTPkMfs3RNxaOgWm59vyjpsub7Us07BwCcmu1N5kildhO8Fm0syoI2kHnzGkJBLSvsg==}
engines: {node: '>= 10'}
cpu: [x64]
os: [win32]
'@napi-rs/canvas@0.1.73':
resolution: {integrity: sha512-9iwPZrNlCK4rG+vWyDvyvGeYjck9MoP0NVQP6N60gqJNFA1GsN0imG05pzNsqfCvFxUxgiTYlR8ff0HC1HXJiw==}
engines: {node: '>= 10'}
'@napi-rs/wasm-runtime@0.2.12':
resolution: {integrity: sha512-ZVWUcfwY4E/yPitQJl481FjFo3K22D6qF0DuFH6Y/nbnE11GY5uguDxZMGXPQ8WQ0128MXQD7TnfHyK4oWoIJQ==}
@@ -6004,6 +6205,10 @@ packages:
'@types/tabulator-tables@6.2.8':
resolution: {integrity: sha512-AhyqabOXLW3k8685sOWtNAY6hrUZqabysGvEsdIuIXpFViSK/cFziiafztsP/Tveh03qqIKsXu60Mw145o9g4w==}
'@types/tesseract.js@2.0.0':
resolution: {integrity: sha512-t0uNy5L9Ynp/O/fu0+75/ot7lWZZRlwsVwaPQOeYud/V6a0B/JjfYvwnrA4TV6+R9xc1ioRLukqjhI8Spy5diw==}
deprecated: This is a stub types definition. tesseract.js provides its own type definitions, so you do not need this installed.
'@types/through2@2.0.41':
resolution: {integrity: sha512-ryQ0tidWkb1O1JuYvWKyMLYEtOWDqF5mHerJzKz/gQpoAaJq2l/dsMPBF0B5BNVT34rbARYJ5/tsZwLfUi2kwQ==}
@@ -6896,6 +7101,9 @@ packages:
blurhash@2.0.5:
resolution: {integrity: sha512-cRygWd7kGBQO3VEhPiTgq4Wc43ctsM+o46urrmPOiuAe+07fzlSB9OJVdpgDL0jPqXUVQ9ht7aq7kxOeJHRK+w==}
bmp-js@0.1.0:
resolution: {integrity: sha512-vHdS19CnY3hwiNdkaqk93DvjVLfbEcI8mys4UjuWrlX1haDmroo8o4xCzh4wD6DGV6HxRCyauwhHRqMTfERtjw==}
bmp-ts@1.0.9:
resolution: {integrity: sha512-cTEHk2jLrPyi+12M3dhpEbnnPOsaZuq7C45ylbbQIiWgDFZq4UVYPEY5mlqjvsj/6gJv9qX5sa+ebDzLXT28Vw==}
@@ -7300,10 +7508,17 @@ packages:
color-parse@2.0.2:
resolution: {integrity: sha512-eCtOz5w5ttWIUcaKLiktF+DxZO1R9KLNY/xhbV6CkhM7sR3GhVghmt6X6yOnzeaM24po+Z9/S1apbXMwA3Iepw==}
color-string@1.9.1:
resolution: {integrity: sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg==}
color-support@1.1.3:
resolution: {integrity: sha512-qiBjkpbMLO/HL68y+lh4q0/O1MZFj2RX6X/KmMa3+gJD3z+WwI1ZzDHysvqHGS3mP6mznPckpXmw1nI9cJjyRg==}
hasBin: true
color@4.2.3:
resolution: {integrity: sha512-1rXeuUUiGGrykh+CeBdu5Ie7OJwinCgQY0bc7GCRxy5xVHy+moaqkpL/jqQq0MtQOeYcrqEz4abc5f0KtU7W4A==}
engines: {node: '>=12.5.0'}
colord@2.9.3:
resolution: {integrity: sha512-jeC1axXpnb0/2nn/Y1LPuLdgXBLH7aDcHu4KEKfqw3CUhX7ZpfBSlPKyqXE6btIgEzfWtrX3/tyBCaCvXvMkOw==}
@@ -9574,6 +9789,9 @@ packages:
peerDependencies:
postcss: ^8.1.0
idb-keyval@6.2.2:
resolution: {integrity: sha512-yjD9nARJ/jb1g+CvD0tlhUHOrJ9Sy0P8T9MF3YaLlHnSRpwPfpTX0XIvpmw3gAJUmEu3FiICLBDPXVwyEvrleg==}
identity-obj-proxy@3.0.0:
resolution: {integrity: sha512-00n6YnVHKrinT9t0d9+5yZC6UBNJANpYEQvL2LlX6Ab9lnmxzIRcEmTPuyGScvl1+jKuCICX1Z0Ab1pPKKdikA==}
engines: {node: '>=4'}
@@ -9736,6 +9954,9 @@ packages:
is-arrayish@0.2.1:
resolution: {integrity: sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==}
is-arrayish@0.3.2:
resolution: {integrity: sha512-eVRqCvVlZbuw3GrM63ovNSNAeA1K16kaR/LRY/92w0zxQ5/1YzwblUX652i4Xs9RwAGjW9d9y6X88t8OaAJfWQ==}
is-async-function@2.1.1:
resolution: {integrity: sha512-9dgM/cZBnNvjzaMYHVoxxfPj2QXt22Ev7SuuPrs+xav0ukGB0S6d4ydZdEiM48kLx5kDV+QBPrpVnFyefL8kkQ==}
engines: {node: '>= 0.4'}
@@ -11243,6 +11464,9 @@ packages:
engines: {node: '>=10.5.0'}
deprecated: Use your platform's native DOMException instead
node-ensure@0.0.0:
resolution: {integrity: sha512-DRI60hzo2oKN1ma0ckc6nQWlHU69RH6xN0sjQTjMpChPfTYvKZdcQFfdYK2RWbJcKyUizSIy/l8OTGxMAM1QDw==}
node-environment-flags@1.0.6:
resolution: {integrity: sha512-5Evy2epuL+6TM0lCQGpFIj6KwiEsGh1SrHUhTbNX+sLbBtjidPZFAnVK9y5yU1+h//RitLbRHTIMyxQPtxMdHw==}
@@ -11419,6 +11643,10 @@ packages:
obuf@1.1.2:
resolution: {integrity: sha512-PX1wu0AmAdPqOL1mWhqmlOd8kOIZQwGZw6rh7uby9fTc5lhaOWFLX3I6R1hrF9k3zUY40e6igsLGkDXK92LJNg==}
officeparser@5.2.0:
resolution: {integrity: sha512-EGdHj4RgP5FtyTHsqgDz2ZXkV2q2o2Ktwk4ogHpVcRT1+udwb3pRLfmlNO9ZMDZtDhJz5qNIUAs/+ItrUWoHiQ==}
hasBin: true
oidc-token-hash@5.1.0:
resolution: {integrity: sha512-y0W+X7Ppo7oZX6eovsRkuzcSM40Bicg2JEJkDJ4irIt1wsYAP5MLSNv+QAogO8xivMffw/9OvV3um1pxXgt1uA==}
engines: {node: ^10.13.0 || >=12.0.0}
@@ -11474,6 +11702,10 @@ packages:
openapi-types@12.1.3:
resolution: {integrity: sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw==}
opencollective-postinstall@2.0.3:
resolution: {integrity: sha512-8AV/sCtuzUeTo8gQK5qDZzARrulB3egtLzFgteqB2tcT4Mw7B8Kt7JcDHmltjz6FOAHsvTevk70gZEbhM4ZS9Q==}
hasBin: true
opener@1.5.2:
resolution: {integrity: sha512-ur5UIdyw5Y7yEj9wLzhqXiy6GZ3Mwx0yGI+5sMn2r0N0v3cKJvUmFH5yPP+WXh9e0xfyzyJX95D8l088DNFj7A==}
hasBin: true
@@ -11735,6 +11967,14 @@ packages:
resolution: {integrity: sha512-XDF38WCH3z5OV/OVa8GKUNtLAyneuzbCisx7QUCF8Q6Nutx0WnJrQe5O+kOtBlLfRNUws98Y58Lblp+NJG5T4Q==}
hasBin: true
pdf-parse@1.1.1:
resolution: {integrity: sha512-v6ZJ/efsBpGrGGknjtq9J/oC8tZWq0KWL5vQrk2GlzLEQPUDB1ex+13Rmidl1neNN358Jn9EHZw5y07FFtaC7A==}
engines: {node: '>=6.8.1'}
pdfjs-dist@5.3.93:
resolution: {integrity: sha512-w3fQKVL1oGn8FRyx5JUG5tnbblggDqyx2XzA5brsJ5hSuS+I0NdnJANhmeWKLjotdbPQucLBug5t0MeWr0AAdg==}
engines: {node: '>=20.16.0 || >=22.3.0'}
pe-library@1.0.1:
resolution: {integrity: sha512-nh39Mo1eGWmZS7y+mK/dQIqg7S1lp38DpRxkyoHf0ZcUs/HDc+yyTjuOtTvSMZHmfSLuSQaX945u05Y2Q6UWZg==}
engines: {node: '>=14', npm: '>=7'}
@@ -12972,6 +13212,9 @@ packages:
regenerate@1.4.2:
resolution: {integrity: sha512-zrceR/XhGYU/d/opr2EKO7aRHUeiBI8qjtfHqADTwZd6Szfy16la6kqD0MIUs5z5hx6AaKa+PixpPrR289+I0A==}
regenerator-runtime@0.13.11:
resolution: {integrity: sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg==}
regenerator-transform@0.15.2:
resolution: {integrity: sha512-hfMp2BoF0qOk3uc5V20ALGDS2ddjQaLrdl7xrGXvAIow7qeWRM2VA2HuCHkUKk9slq3VwEwLNK3DFBqDfPGYtg==}
@@ -13512,6 +13755,10 @@ packages:
setprototypeof@1.2.0:
resolution: {integrity: sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==}
sharp@0.34.3:
resolution: {integrity: sha512-eX2IQ6nFohW4DbvHIOLRB3MHFpYqaqvXd3Tp5e/T/dSH83fxaNJQRvDMhASmkNTsNTVF2/OOopzRCt7xokgPfg==}
engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
shebang-command@1.2.0:
resolution: {integrity: sha512-EV3L1+UQWGor21OmnvojK36mhg+TyIKDh3iFBKBohr5xeXIhNBcx8oWdgkTEEQ+BEFFYdLRuqMfd5L84N1V5Vg==}
engines: {node: '>=0.10.0'}
@@ -13586,6 +13833,9 @@ packages:
simple-git@3.28.0:
resolution: {integrity: sha512-Rs/vQRwsn1ILH1oBUy8NucJlXmnnLeLCfcvbSehkPzbv3wwoFWIdtfd6Ndo6ZPhlPsCZ60CPI4rxurnwAa+a2w==}
simple-swizzle@0.2.2:
resolution: {integrity: sha512-JA//kQgZtbuY83m+xT+tXJkmJncGMTFT+C+g2h2R9uxkYIrE2yy9sgmcLhCnw57/WSD+Eh3J97FPEDFnbXnDUg==}
simple-xml-to-json@1.2.3:
resolution: {integrity: sha512-kWJDCr9EWtZ+/EYYM5MareWj2cRnZGF93YDNpH4jQiHB+hBIZnfPFSQiVMzZOdk+zXWqTZ/9fTeQNu2DqeiudA==}
engines: {node: '>=20.12.2'}
@@ -14207,6 +14457,12 @@ packages:
engines: {node: '>=10'}
hasBin: true
tesseract.js-core@6.0.0:
resolution: {integrity: sha512-1Qncm/9oKM7xgrQXZXNB+NRh19qiXGhxlrR8EwFbK5SaUbPZnS5OMtP/ghtqfd23hsr1ZvZbZjeuAGcMxd/ooA==}
tesseract.js@6.0.1:
resolution: {integrity: sha512-/sPvMvrCtgxnNRCjbTYbr7BRu0yfWDsMZQ2a/T5aN/L1t8wUQN6tTWv6p6FwzpoEBA0jrN2UD2SX4QQFRdoDbA==}
test-exclude@6.0.0:
resolution: {integrity: sha512-cAGWPIyOHU6zlmg88jwm7VRyXnMN7iV68OGAbYDk/Mh/xC/pzVPlQtY6ngoIH/5/tciuhGfvESU8GrHrcxD56w==}
engines: {node: '>=8'}
@@ -14980,6 +15236,9 @@ packages:
warning@4.0.3:
resolution: {integrity: sha512-rpJyN222KWIvHJ/F53XSZv0Zl/accqHR8et1kpaMTD/fLCRxtV8iX8czMzY7sVZupTI3zcUTg8eycS2kNF9l6w==}
wasm-feature-detect@1.8.0:
resolution: {integrity: sha512-zksaLKM2fVlnB5jQQDqKXXwYHLQUVH9es+5TOOHwGOVJOCeRBCiPjwSg+3tN2AdTCzjgli4jijCH290kXb/zWQ==}
watchpack@2.4.4:
resolution: {integrity: sha512-c5EGNOiyxxV5qmTtAB7rbiXxi1ooX1pQKMLX/MIabJjRA0SJBQOjKF+KSVfHkr9U1cADPon0mRiVe/riyaiDUA==}
engines: {node: '>=10.13.0'}
@@ -15380,6 +15639,9 @@ packages:
resolution: {integrity: sha512-zK7YHHz4ZXpW89AHXUPbQVGKI7uvkd3hzusTdotCg1UxyaVtg0zFJSTfW/Dq5f7OBBVnq6cZIaC8Ti4hb6dtCA==}
engines: {node: '>= 14'}
zlibjs@0.3.1:
resolution: {integrity: sha512-+J9RrgTKOmlxFSDHo0pI1xM6BLVUv+o0ZT9ANtCxGkjIVCCUdx9alUF8Gm+dGLKbkkkidWIHFDZHDMpfITt4+w==}
zod@3.24.4:
resolution: {integrity: sha512-OdqJE9UDRPwWsrHjLN2F8bPxvwJBK22EHLWtanu0LSYr5YqzsaaW3RMgmjwr8Rypg5k+meEJdSPXJZXE/yqOMg==}
@@ -16697,6 +16959,8 @@ snapshots:
'@ckeditor/ckeditor5-core': 46.0.0
'@ckeditor/ckeditor5-upload': 46.0.0
ckeditor5: 46.0.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41)
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-ai@46.0.0':
dependencies:
@@ -16821,6 +17085,8 @@ snapshots:
'@ckeditor/ckeditor5-utils': 46.0.0
'@ckeditor/ckeditor5-widget': 46.0.0
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-cloud-services@46.0.0':
dependencies:
@@ -17052,6 +17318,8 @@ snapshots:
'@ckeditor/ckeditor5-utils': 46.0.0
ckeditor5: 46.0.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41)
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-editor-classic@46.0.0':
dependencies:
@@ -17061,6 +17329,8 @@ snapshots:
'@ckeditor/ckeditor5-utils': 46.0.0
ckeditor5: 46.0.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41)
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-editor-decoupled@46.0.0':
dependencies:
@@ -17070,6 +17340,8 @@ snapshots:
'@ckeditor/ckeditor5-utils': 46.0.0
ckeditor5: 46.0.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41)
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-editor-inline@46.0.0':
dependencies:
@@ -17103,8 +17375,6 @@ snapshots:
'@ckeditor/ckeditor5-table': 46.0.0
'@ckeditor/ckeditor5-utils': 46.0.0
ckeditor5: 46.0.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41)
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-emoji@46.0.0':
dependencies:
@@ -17161,8 +17431,6 @@ snapshots:
'@ckeditor/ckeditor5-ui': 46.0.0
'@ckeditor/ckeditor5-utils': 46.0.0
ckeditor5: 46.0.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41)
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-export-word@46.0.0':
dependencies:
@@ -17187,6 +17455,8 @@ snapshots:
'@ckeditor/ckeditor5-utils': 46.0.0
ckeditor5: 46.0.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41)
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-font@46.0.0':
dependencies:
@@ -17250,6 +17520,8 @@ snapshots:
'@ckeditor/ckeditor5-utils': 46.0.0
'@ckeditor/ckeditor5-widget': 46.0.0
ckeditor5: 46.0.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41)
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-html-embed@46.0.0':
dependencies:
@@ -17295,8 +17567,6 @@ snapshots:
'@ckeditor/ckeditor5-widget': 46.0.0
ckeditor5: 46.0.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41)
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-import-word@46.0.0':
dependencies:
@@ -17309,8 +17579,6 @@ snapshots:
'@ckeditor/ckeditor5-ui': 46.0.0
'@ckeditor/ckeditor5-utils': 46.0.0
ckeditor5: 46.0.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41)
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-indent@46.0.0':
dependencies:
@@ -17333,8 +17601,6 @@ snapshots:
'@ckeditor/ckeditor5-ui': 46.0.0
'@ckeditor/ckeditor5-utils': 46.0.0
ckeditor5: 46.0.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41)
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-line-height@46.0.0':
dependencies:
@@ -17358,8 +17624,6 @@ snapshots:
'@ckeditor/ckeditor5-widget': 46.0.0
ckeditor5: 46.0.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41)
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-list-multi-level@46.0.0':
dependencies:
@@ -17383,8 +17647,6 @@ snapshots:
'@ckeditor/ckeditor5-ui': 46.0.0
'@ckeditor/ckeditor5-utils': 46.0.0
ckeditor5: 46.0.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41)
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-markdown-gfm@46.0.0':
dependencies:
@@ -17422,8 +17684,6 @@ snapshots:
'@ckeditor/ckeditor5-utils': 46.0.0
'@ckeditor/ckeditor5-widget': 46.0.0
ckeditor5: 46.0.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41)
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-mention@46.0.0(patch_hash=5981fb59ba35829e4dff1d39cf771000f8a8fdfa7a34b51d8af9549541f2d62d)':
dependencies:
@@ -17433,8 +17693,6 @@ snapshots:
'@ckeditor/ckeditor5-utils': 46.0.0
ckeditor5: 46.0.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41)
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-merge-fields@46.0.0':
dependencies:
@@ -17447,8 +17705,6 @@ snapshots:
'@ckeditor/ckeditor5-widget': 46.0.0
ckeditor5: 46.0.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41)
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-minimap@46.0.0':
dependencies:
@@ -17457,8 +17713,6 @@ snapshots:
'@ckeditor/ckeditor5-ui': 46.0.0
'@ckeditor/ckeditor5-utils': 46.0.0
ckeditor5: 46.0.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41)
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-operations-compressor@46.0.0':
dependencies:
@@ -17511,8 +17765,6 @@ snapshots:
'@ckeditor/ckeditor5-utils': 46.0.0
'@ckeditor/ckeditor5-widget': 46.0.0
ckeditor5: 46.0.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41)
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-pagination@46.0.0':
dependencies:
@@ -17619,8 +17871,6 @@ snapshots:
'@ckeditor/ckeditor5-ui': 46.0.0
'@ckeditor/ckeditor5-utils': 46.0.0
ckeditor5: 46.0.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41)
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-slash-command@46.0.0':
dependencies:
@@ -17633,8 +17883,6 @@ snapshots:
'@ckeditor/ckeditor5-ui': 46.0.0
'@ckeditor/ckeditor5-utils': 46.0.0
ckeditor5: 46.0.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41)
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-source-editing-enhanced@46.0.0':
dependencies:
@@ -17682,8 +17930,6 @@ snapshots:
'@ckeditor/ckeditor5-utils': 46.0.0
ckeditor5: 46.0.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41)
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-table@46.0.0':
dependencies:
@@ -17696,8 +17942,6 @@ snapshots:
'@ckeditor/ckeditor5-widget': 46.0.0
ckeditor5: 46.0.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41)
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-template@46.0.0':
dependencies:
@@ -17810,8 +18054,6 @@ snapshots:
'@ckeditor/ckeditor5-engine': 46.0.0
'@ckeditor/ckeditor5-utils': 46.0.0
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-widget@46.0.0':
dependencies:
@@ -17831,8 +18073,6 @@ snapshots:
'@ckeditor/ckeditor5-utils': 46.0.0
ckeditor5: 46.0.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41)
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@codemirror/autocomplete@6.18.6':
dependencies:
@@ -18960,6 +19200,92 @@ snapshots:
transitivePeerDependencies:
- supports-color
'@img/sharp-darwin-arm64@0.34.3':
optionalDependencies:
'@img/sharp-libvips-darwin-arm64': 1.2.0
optional: true
'@img/sharp-darwin-x64@0.34.3':
optionalDependencies:
'@img/sharp-libvips-darwin-x64': 1.2.0
optional: true
'@img/sharp-libvips-darwin-arm64@1.2.0':
optional: true
'@img/sharp-libvips-darwin-x64@1.2.0':
optional: true
'@img/sharp-libvips-linux-arm64@1.2.0':
optional: true
'@img/sharp-libvips-linux-arm@1.2.0':
optional: true
'@img/sharp-libvips-linux-ppc64@1.2.0':
optional: true
'@img/sharp-libvips-linux-s390x@1.2.0':
optional: true
'@img/sharp-libvips-linux-x64@1.2.0':
optional: true
'@img/sharp-libvips-linuxmusl-arm64@1.2.0':
optional: true
'@img/sharp-libvips-linuxmusl-x64@1.2.0':
optional: true
'@img/sharp-linux-arm64@0.34.3':
optionalDependencies:
'@img/sharp-libvips-linux-arm64': 1.2.0
optional: true
'@img/sharp-linux-arm@0.34.3':
optionalDependencies:
'@img/sharp-libvips-linux-arm': 1.2.0
optional: true
'@img/sharp-linux-ppc64@0.34.3':
optionalDependencies:
'@img/sharp-libvips-linux-ppc64': 1.2.0
optional: true
'@img/sharp-linux-s390x@0.34.3':
optionalDependencies:
'@img/sharp-libvips-linux-s390x': 1.2.0
optional: true
'@img/sharp-linux-x64@0.34.3':
optionalDependencies:
'@img/sharp-libvips-linux-x64': 1.2.0
optional: true
'@img/sharp-linuxmusl-arm64@0.34.3':
optionalDependencies:
'@img/sharp-libvips-linuxmusl-arm64': 1.2.0
optional: true
'@img/sharp-linuxmusl-x64@0.34.3':
optionalDependencies:
'@img/sharp-libvips-linuxmusl-x64': 1.2.0
optional: true
'@img/sharp-wasm32@0.34.3':
dependencies:
'@emnapi/runtime': 1.4.4
optional: true
'@img/sharp-win32-arm64@0.34.3':
optional: true
'@img/sharp-win32-ia32@0.34.3':
optional: true
'@img/sharp-win32-x64@0.34.3':
optional: true
'@inlang/paraglide-js@2.2.0(babel-plugin-macros@3.1.0)':
dependencies:
'@inlang/recommend-sherlock': 0.2.1
@@ -19678,6 +20004,50 @@ snapshots:
strict-event-emitter: 0.5.1
optional: true
'@napi-rs/canvas-android-arm64@0.1.73':
optional: true
'@napi-rs/canvas-darwin-arm64@0.1.73':
optional: true
'@napi-rs/canvas-darwin-x64@0.1.73':
optional: true
'@napi-rs/canvas-linux-arm-gnueabihf@0.1.73':
optional: true
'@napi-rs/canvas-linux-arm64-gnu@0.1.73':
optional: true
'@napi-rs/canvas-linux-arm64-musl@0.1.73':
optional: true
'@napi-rs/canvas-linux-riscv64-gnu@0.1.73':
optional: true
'@napi-rs/canvas-linux-x64-gnu@0.1.73':
optional: true
'@napi-rs/canvas-linux-x64-musl@0.1.73':
optional: true
'@napi-rs/canvas-win32-x64-msvc@0.1.73':
optional: true
'@napi-rs/canvas@0.1.73':
optionalDependencies:
'@napi-rs/canvas-android-arm64': 0.1.73
'@napi-rs/canvas-darwin-arm64': 0.1.73
'@napi-rs/canvas-darwin-x64': 0.1.73
'@napi-rs/canvas-linux-arm-gnueabihf': 0.1.73
'@napi-rs/canvas-linux-arm64-gnu': 0.1.73
'@napi-rs/canvas-linux-arm64-musl': 0.1.73
'@napi-rs/canvas-linux-riscv64-gnu': 0.1.73
'@napi-rs/canvas-linux-x64-gnu': 0.1.73
'@napi-rs/canvas-linux-x64-musl': 0.1.73
'@napi-rs/canvas-win32-x64-msvc': 0.1.73
optional: true
'@napi-rs/wasm-runtime@0.2.12':
dependencies:
'@emnapi/core': 1.4.5
@@ -22061,6 +22431,12 @@ snapshots:
'@types/tabulator-tables@6.2.8': {}
'@types/tesseract.js@2.0.0(encoding@0.1.13)':
dependencies:
tesseract.js: 6.0.1(encoding@0.1.13)
transitivePeerDependencies:
- encoding
'@types/through2@2.0.41':
dependencies:
'@types/node': 22.17.0
@@ -23158,6 +23534,8 @@ snapshots:
blurhash@2.0.5: {}
bmp-js@0.1.0: {}
bmp-ts@1.0.9: {}
body-parser@1.20.3:
@@ -23774,9 +24152,19 @@ snapshots:
dependencies:
color-name: 2.0.0
color-string@1.9.1:
dependencies:
color-name: 1.1.4
simple-swizzle: 0.2.2
color-support@1.1.3:
optional: true
color@4.2.3:
dependencies:
color-convert: 2.0.1
color-string: 1.9.1
colord@2.9.3: {}
colorette@2.0.20: {}
@@ -26649,6 +27037,8 @@ snapshots:
dependencies:
postcss: 8.5.6
idb-keyval@6.2.2: {}
identity-obj-proxy@3.0.0:
dependencies:
harmony-reflect: 1.6.2
@@ -26775,6 +27165,8 @@ snapshots:
is-arrayish@0.2.1: {}
is-arrayish@0.3.2: {}
is-async-function@2.1.1:
dependencies:
async-function: 1.0.0
@@ -28780,6 +29172,8 @@ snapshots:
node-domexception@1.0.0: {}
node-ensure@0.0.0: {}
node-environment-flags@1.0.6:
dependencies:
object.getownpropertydescriptors: 2.1.8
@@ -29034,6 +29428,15 @@ snapshots:
obuf@1.1.2: {}
officeparser@5.2.0:
dependencies:
'@xmldom/xmldom': 0.8.10
concat-stream: 2.0.0
file-type: 16.5.4
node-ensure: 0.0.0
pdfjs-dist: 5.3.93
yauzl: 3.2.0
oidc-token-hash@5.1.0: {}
ollama@0.5.16:
@@ -29082,6 +29485,8 @@ snapshots:
openapi-types@12.1.3: {}
opencollective-postinstall@2.0.3: {}
opener@1.5.2: {}
openid-client@4.9.1:
@@ -29386,6 +29791,17 @@ snapshots:
ieee754: 1.2.1
resolve-protobuf-schema: 2.1.0
pdf-parse@1.1.1:
dependencies:
debug: 4.4.1(supports-color@6.0.0)
node-ensure: 0.0.0
transitivePeerDependencies:
- supports-color
pdfjs-dist@5.3.93:
optionalDependencies:
'@napi-rs/canvas': 0.1.73
pe-library@1.0.1: {}
peek-readable@4.1.0: {}
@@ -30652,6 +31068,8 @@ snapshots:
regenerate@1.4.2: {}
regenerator-runtime@0.13.11: {}
regenerator-transform@0.15.2:
dependencies:
'@babel/runtime': 7.27.6
@@ -31328,6 +31746,35 @@ snapshots:
setprototypeof@1.2.0: {}
sharp@0.34.3:
dependencies:
color: 4.2.3
detect-libc: 2.0.4
semver: 7.7.2
optionalDependencies:
'@img/sharp-darwin-arm64': 0.34.3
'@img/sharp-darwin-x64': 0.34.3
'@img/sharp-libvips-darwin-arm64': 1.2.0
'@img/sharp-libvips-darwin-x64': 1.2.0
'@img/sharp-libvips-linux-arm': 1.2.0
'@img/sharp-libvips-linux-arm64': 1.2.0
'@img/sharp-libvips-linux-ppc64': 1.2.0
'@img/sharp-libvips-linux-s390x': 1.2.0
'@img/sharp-libvips-linux-x64': 1.2.0
'@img/sharp-libvips-linuxmusl-arm64': 1.2.0
'@img/sharp-libvips-linuxmusl-x64': 1.2.0
'@img/sharp-linux-arm': 0.34.3
'@img/sharp-linux-arm64': 0.34.3
'@img/sharp-linux-ppc64': 0.34.3
'@img/sharp-linux-s390x': 0.34.3
'@img/sharp-linux-x64': 0.34.3
'@img/sharp-linuxmusl-arm64': 0.34.3
'@img/sharp-linuxmusl-x64': 0.34.3
'@img/sharp-wasm32': 0.34.3
'@img/sharp-win32-arm64': 0.34.3
'@img/sharp-win32-ia32': 0.34.3
'@img/sharp-win32-x64': 0.34.3
shebang-command@1.2.0:
dependencies:
shebang-regex: 1.0.0
@@ -31418,6 +31865,10 @@ snapshots:
transitivePeerDependencies:
- supports-color
simple-swizzle@0.2.2:
dependencies:
is-arrayish: 0.3.2
simple-xml-to-json@1.2.3: {}
sirv@3.0.1:
@@ -32264,6 +32715,22 @@ snapshots:
commander: 2.20.3
source-map-support: 0.5.21
tesseract.js-core@6.0.0: {}
tesseract.js@6.0.1(encoding@0.1.13):
dependencies:
bmp-js: 0.1.0
idb-keyval: 6.2.2
is-url: 1.2.4
node-fetch: 2.7.0(encoding@0.1.13)
opencollective-postinstall: 2.0.3
regenerator-runtime: 0.13.11
tesseract.js-core: 6.0.0
wasm-feature-detect: 1.8.0
zlibjs: 0.3.1
transitivePeerDependencies:
- encoding
test-exclude@6.0.0:
dependencies:
'@istanbuljs/schema': 0.1.3
@@ -33219,6 +33686,8 @@ snapshots:
dependencies:
loose-envify: 1.4.0
wasm-feature-detect@1.8.0: {}
watchpack@2.4.4:
dependencies:
glob-to-regexp: 0.4.1
@@ -33716,6 +34185,8 @@ snapshots:
compress-commons: 6.0.2
readable-stream: 4.7.0
zlibjs@0.3.1: {}
zod@3.24.4: {}
zustand@4.5.6(@types/react@19.1.7)(react@16.14.0):

BIN
ron.traineddata Normal file

Binary file not shown.