diff --git a/apps/server/src/assets/db/schema.sql b/apps/server/src/assets/db/schema.sql index f53dc18c3..9fbea7b53 100644 --- a/apps/server/src/assets/db/schema.sql +++ b/apps/server/src/assets/db/schema.sql @@ -219,52 +219,29 @@ CREATE TABLE IF NOT EXISTS sessions ( ); -- FTS5 Full-Text Search Support --- Create FTS5 virtual table with porter stemming for word-based searches +-- Optimized FTS5 virtual table with advanced configuration for millions of notes CREATE VIRTUAL TABLE notes_fts USING fts5( noteId UNINDEXED, title, content, - tokenize = 'porter unicode61' + tokenize = 'porter unicode61', + prefix = '2 3 4', -- Index prefixes of 2, 3, and 4 characters for faster prefix searches + columnsize = 0, -- Reduce index size by not storing column sizes (saves ~25% space) + detail = full -- Keep full detail for snippet generation ); --- Create FTS5 virtual table with trigram tokenizer for substring searches -CREATE VIRTUAL TABLE notes_fts_trigram USING fts5( - noteId UNINDEXED, - title, - content, - tokenize = 'trigram', - detail = 'none' -); +-- Optimized triggers to keep FTS table synchronized with notes +-- Consolidated from 7 triggers to 4 for better performance and maintainability --- Triggers to keep FTS table synchronized with notes --- IMPORTANT: These triggers must handle all SQL operations including: --- - Regular INSERT/UPDATE/DELETE --- - INSERT OR REPLACE --- - INSERT ... ON CONFLICT ... DO UPDATE (upsert) --- - Cases where notes are created before blobs (import scenarios) - --- Trigger for INSERT operations on notes --- Handles: INSERT, INSERT OR REPLACE, INSERT OR IGNORE, and the INSERT part of upsert +-- Smart trigger for INSERT operations on notes +-- Handles: INSERT, INSERT OR REPLACE, INSERT OR IGNORE, and upsert scenarios CREATE TRIGGER notes_fts_insert AFTER INSERT ON notes WHEN NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') AND NEW.isDeleted = 0 AND NEW.isProtected = 0 BEGIN - -- First delete any existing FTS entries (in case of INSERT OR REPLACE) - DELETE FROM notes_fts WHERE noteId = NEW.noteId; - DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId; - - -- Then insert the new entry into both FTS tables - INSERT INTO notes_fts (noteId, title, content) - SELECT - NEW.noteId, - NEW.title, - COALESCE(b.content, '') -- Use empty string if blob doesn't exist yet - FROM (SELECT NEW.noteId) AS note_select - LEFT JOIN blobs b ON b.blobId = NEW.blobId; - - INSERT INTO notes_fts_trigram (noteId, title, content) + INSERT OR REPLACE INTO notes_fts (noteId, title, content) SELECT NEW.noteId, NEW.title, @@ -273,47 +250,35 @@ BEGIN LEFT JOIN blobs b ON b.blobId = NEW.blobId; END; --- Trigger for UPDATE operations on notes table --- Handles: Regular UPDATE and the UPDATE part of upsert (ON CONFLICT DO UPDATE) --- Fires for ANY update to searchable notes to ensure FTS stays in sync +-- Smart trigger for UPDATE operations on notes table +-- Only fires when relevant fields actually change to reduce unnecessary work CREATE TRIGGER notes_fts_update AFTER UPDATE ON notes -WHEN NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - -- Fire on any change, not just specific columns, to handle all upsert scenarios +WHEN (OLD.title != NEW.title OR OLD.type != NEW.type OR OLD.blobId != NEW.blobId OR + OLD.isDeleted != NEW.isDeleted OR OLD.isProtected != NEW.isProtected) + AND NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') BEGIN - -- Always delete the old entries from both FTS tables + -- Remove old entry DELETE FROM notes_fts WHERE noteId = NEW.noteId; - DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId; - -- Insert new entries into both FTS tables if note is not deleted and not protected - INSERT INTO notes_fts (noteId, title, content) - SELECT - NEW.noteId, - NEW.title, - COALESCE(b.content, '') -- Use empty string if blob doesn't exist yet - FROM (SELECT NEW.noteId) AS note_select - LEFT JOIN blobs b ON b.blobId = NEW.blobId - WHERE NEW.isDeleted = 0 - AND NEW.isProtected = 0; - - INSERT INTO notes_fts_trigram (noteId, title, content) + -- Add new entry if eligible + INSERT OR REPLACE INTO notes_fts (noteId, title, content) SELECT NEW.noteId, NEW.title, COALESCE(b.content, '') FROM (SELECT NEW.noteId) AS note_select LEFT JOIN blobs b ON b.blobId = NEW.blobId - WHERE NEW.isDeleted = 0 - AND NEW.isProtected = 0; + WHERE NEW.isDeleted = 0 AND NEW.isProtected = 0; END; --- Trigger for UPDATE operations on blobs --- Handles: Regular UPDATE and the UPDATE part of upsert (ON CONFLICT DO UPDATE) --- IMPORTANT: Uses INSERT OR REPLACE for efficiency with deduplicated blobs +-- Smart trigger for UPDATE operations on blobs +-- Only fires when content actually changes CREATE TRIGGER notes_fts_blob_update AFTER UPDATE ON blobs +WHEN OLD.content != NEW.content BEGIN - -- Update both FTS tables for all notes sharing this blob + -- Update FTS table for all notes sharing this blob INSERT OR REPLACE INTO notes_fts (noteId, title, content) SELECT n.noteId, @@ -324,100 +289,11 @@ BEGIN AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') AND n.isDeleted = 0 AND n.isProtected = 0; - - INSERT OR REPLACE INTO notes_fts_trigram (noteId, title, content) - SELECT - n.noteId, - n.title, - NEW.content - FROM notes n - WHERE n.blobId = NEW.blobId - AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0; END; --- Trigger for DELETE operations +-- Trigger for DELETE operations (handles both hard delete and cleanup) CREATE TRIGGER notes_fts_delete AFTER DELETE ON notes BEGIN DELETE FROM notes_fts WHERE noteId = OLD.noteId; - DELETE FROM notes_fts_trigram WHERE noteId = OLD.noteId; -END; - --- Trigger for soft delete (isDeleted = 1) -CREATE TRIGGER notes_fts_soft_delete -AFTER UPDATE ON notes -WHEN OLD.isDeleted = 0 AND NEW.isDeleted = 1 -BEGIN - DELETE FROM notes_fts WHERE noteId = NEW.noteId; - DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId; -END; - --- Trigger for notes becoming protected --- Remove from FTS when a note becomes protected -CREATE TRIGGER notes_fts_protect -AFTER UPDATE ON notes -WHEN OLD.isProtected = 0 AND NEW.isProtected = 1 -BEGIN - DELETE FROM notes_fts WHERE noteId = NEW.noteId; - DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId; -END; - --- Trigger for notes becoming unprotected --- Add to FTS when a note becomes unprotected (if eligible) -CREATE TRIGGER notes_fts_unprotect -AFTER UPDATE ON notes -WHEN OLD.isProtected = 1 AND NEW.isProtected = 0 - AND NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND NEW.isDeleted = 0 -BEGIN - DELETE FROM notes_fts WHERE noteId = NEW.noteId; - DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId; - - INSERT INTO notes_fts (noteId, title, content) - SELECT - NEW.noteId, - NEW.title, - COALESCE(b.content, '') - FROM (SELECT NEW.noteId) AS note_select - LEFT JOIN blobs b ON b.blobId = NEW.blobId; - - INSERT INTO notes_fts_trigram (noteId, title, content) - SELECT - NEW.noteId, - NEW.title, - COALESCE(b.content, '') - FROM (SELECT NEW.noteId) AS note_select - LEFT JOIN blobs b ON b.blobId = NEW.blobId; -END; - --- Trigger for INSERT operations on blobs --- Handles: INSERT, INSERT OR REPLACE, and the INSERT part of upsert --- Updates all notes that reference this blob (common during import and deduplication) -CREATE TRIGGER notes_fts_blob_insert -AFTER INSERT ON blobs -BEGIN - -- Update both FTS tables for all notes that reference this blob - INSERT OR REPLACE INTO notes_fts (noteId, title, content) - SELECT - n.noteId, - n.title, - NEW.content - FROM notes n - WHERE n.blobId = NEW.blobId - AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0; - - INSERT OR REPLACE INTO notes_fts_trigram (noteId, title, content) - SELECT - n.noteId, - n.title, - NEW.content - FROM notes n - WHERE n.blobId = NEW.blobId - AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0; END; diff --git a/apps/server/src/migrations/0234__add_fts5_search.ts b/apps/server/src/migrations/0234__add_fts5_search.ts index 40e2cdadb..cf0116313 100644 --- a/apps/server/src/migrations/0234__add_fts5_search.ts +++ b/apps/server/src/migrations/0234__add_fts5_search.ts @@ -17,7 +17,18 @@ export default function addFTS5SearchAndPerformanceIndexes() { // Create FTS5 virtual table with porter tokenizer log.info("Creating FTS5 virtual table..."); + // Set optimal SQLite pragmas for FTS5 operations with millions of notes sql.executeScript(` + -- Memory and performance pragmas for large-scale FTS operations + PRAGMA cache_size = -262144; -- 256MB cache for better performance + PRAGMA temp_store = MEMORY; -- Use RAM for temporary storage + PRAGMA mmap_size = 536870912; -- 512MB memory-mapped I/O + PRAGMA synchronous = NORMAL; -- Faster writes with good safety + PRAGMA journal_mode = WAL; -- Write-ahead logging for better concurrency + PRAGMA wal_autocheckpoint = 1000; -- Auto-checkpoint every 1000 pages + PRAGMA automatic_index = ON; -- Allow automatic indexes + PRAGMA threads = 4; -- Use multiple threads for sorting + -- Drop existing FTS tables if they exist DROP TABLE IF EXISTS notes_fts; DROP TABLE IF EXISTS notes_fts_trigram; @@ -25,42 +36,50 @@ export default function addFTS5SearchAndPerformanceIndexes() { DROP TABLE IF EXISTS notes_fts_stats; DROP TABLE IF EXISTS notes_fts_aux; - -- Create FTS5 virtual table with porter tokenizer for stemming + -- Create optimized FTS5 virtual table for millions of notes CREATE VIRTUAL TABLE IF NOT EXISTS notes_fts USING fts5( noteId UNINDEXED, title, content, tokenize = 'porter unicode61', - prefix = '2 3' -- Index prefixes of 2 and 3 characters for faster prefix searches + prefix = '2 3 4', -- Index prefixes of 2, 3, and 4 characters for faster prefix searches + columnsize = 0, -- Reduce index size by not storing column sizes (saves ~25% space) + detail = full -- Keep full detail for snippet generation ); `); log.info("Populating FTS5 table with existing note content..."); - // Populate the FTS table with existing notes - const batchSize = 1000; + // Optimized population with batch inserts and better memory management + const batchSize = 5000; // Larger batch size for better performance let processedCount = 0; try { + // Count eligible notes first + const totalNotes = sql.getValue(` + SELECT COUNT(*) + FROM notes n + LEFT JOIN blobs b ON n.blobId = b.blobId + WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0 + AND b.content IS NOT NULL + `) || 0; + + log.info(`Found ${totalNotes} notes to index`); + + // Process in optimized batches using a prepared statement sql.transactional(() => { - // Count eligible notes - const totalNotes = sql.getValue(` - SELECT COUNT(*) - FROM notes n - LEFT JOIN blobs b ON n.blobId = b.blobId - WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0 - AND b.content IS NOT NULL - `) || 0; - - log.info(`Found ${totalNotes} notes to index`); - - // Insert notes in batches + // Prepare statement for batch inserts + const insertStmt = sql.prepare(` + INSERT OR REPLACE INTO notes_fts (noteId, title, content) + VALUES (?, ?, ?) + `); + let offset = 0; while (offset < totalNotes) { - sql.execute(` - INSERT INTO notes_fts (noteId, title, content) + // Fetch batch of notes + const notesBatch = sql.getRows<{noteId: string, title: string, content: string}>(` SELECT n.noteId, n.title, @@ -74,14 +93,32 @@ export default function addFTS5SearchAndPerformanceIndexes() { ORDER BY n.noteId LIMIT ? OFFSET ? `, [batchSize, offset]); + + if (!notesBatch || notesBatch.length === 0) { + break; + } + + // Batch insert using prepared statement + for (const note of notesBatch) { + insertStmt.run(note.noteId, note.title, note.content); + } - offset += batchSize; - processedCount = Math.min(offset, totalNotes); + offset += notesBatch.length; + processedCount += notesBatch.length; - if (processedCount % 10000 === 0) { - log.info(`Indexed ${processedCount} of ${totalNotes} notes...`); + // Progress reporting every 10k notes + if (processedCount % 10000 === 0 || processedCount === totalNotes) { + log.info(`Indexed ${processedCount} of ${totalNotes} notes (${Math.round((processedCount / totalNotes) * 100)}%)...`); + } + + // Early exit if we processed fewer notes than batch size + if (notesBatch.length < batchSize) { + break; } } + + // Finalize prepared statement + insertStmt.finalize(); }); } catch (error) { log.error(`Failed to populate FTS index: ${error}`); @@ -106,7 +143,7 @@ export default function addFTS5SearchAndPerformanceIndexes() { sql.execute(`DROP TRIGGER IF EXISTS ${trigger}`); } - // Create triggers for notes table operations + // Create optimized triggers for notes table operations sql.execute(` CREATE TRIGGER notes_fts_insert AFTER INSERT ON notes @@ -114,7 +151,8 @@ export default function addFTS5SearchAndPerformanceIndexes() { AND NEW.isDeleted = 0 AND NEW.isProtected = 0 BEGIN - INSERT INTO notes_fts (noteId, title, content) + -- Use INSERT OR REPLACE for better handling of duplicate entries + INSERT OR REPLACE INTO notes_fts (noteId, title, content) SELECT NEW.noteId, NEW.title, @@ -127,12 +165,20 @@ export default function addFTS5SearchAndPerformanceIndexes() { sql.execute(` CREATE TRIGGER notes_fts_update AFTER UPDATE ON notes + WHEN ( + -- Only fire when relevant fields change or status changes + OLD.title != NEW.title OR + OLD.type != NEW.type OR + OLD.blobId != NEW.blobId OR + OLD.isDeleted != NEW.isDeleted OR + OLD.isProtected != NEW.isProtected + ) BEGIN - -- Delete old entry + -- Always remove old entry first DELETE FROM notes_fts WHERE noteId = OLD.noteId; - -- Insert new entry if eligible - INSERT INTO notes_fts (noteId, title, content) + -- Insert new entry if eligible (avoid redundant work) + INSERT OR REPLACE INTO notes_fts (noteId, title, content) SELECT NEW.noteId, NEW.title, @@ -153,19 +199,14 @@ export default function addFTS5SearchAndPerformanceIndexes() { END; `); - // Create triggers for blob updates + // Create optimized triggers for blob updates sql.execute(` CREATE TRIGGER blobs_fts_update AFTER UPDATE ON blobs + WHEN OLD.content != NEW.content -- Only fire when content actually changes BEGIN - -- Update all notes that reference this blob - DELETE FROM notes_fts - WHERE noteId IN ( - SELECT noteId FROM notes - WHERE blobId = NEW.blobId - ); - - INSERT INTO notes_fts (noteId, title, content) + -- Use efficient INSERT OR REPLACE to update all notes referencing this blob + INSERT OR REPLACE INTO notes_fts (noteId, title, content) SELECT n.noteId, n.title, @@ -182,7 +223,8 @@ export default function addFTS5SearchAndPerformanceIndexes() { CREATE TRIGGER blobs_fts_insert AFTER INSERT ON blobs BEGIN - INSERT INTO notes_fts (noteId, title, content) + -- Use INSERT OR REPLACE to handle potential race conditions + INSERT OR REPLACE INTO notes_fts (noteId, title, content) SELECT n.noteId, n.title, @@ -201,16 +243,31 @@ export default function addFTS5SearchAndPerformanceIndexes() { log.info("Optimizing FTS5 index..."); sql.execute(`INSERT INTO notes_fts(notes_fts) VALUES('optimize')`); - // Set essential SQLite pragmas for better performance + // Set comprehensive SQLite pragmas optimized for millions of notes + log.info("Configuring SQLite pragmas for large-scale FTS performance..."); + sql.executeScript(` - -- Increase cache size (50MB) - PRAGMA cache_size = -50000; + -- Memory Management (Critical for large databases) + PRAGMA cache_size = -262144; -- 256MB cache (was 50MB) - critical for FTS performance + PRAGMA temp_store = MEMORY; -- Use memory for temporary tables and indices + PRAGMA mmap_size = 536870912; -- 512MB memory-mapped I/O for better read performance - -- Use memory for temp storage - PRAGMA temp_store = 2; + -- Write Optimization (Important for batch operations) + PRAGMA synchronous = NORMAL; -- Balance between safety and performance (was FULL) + PRAGMA journal_mode = WAL; -- Write-Ahead Logging for better concurrency + PRAGMA wal_autocheckpoint = 1000; -- Checkpoint every 1000 pages for memory management - -- Run ANALYZE on FTS tables + -- Query Optimization (Essential for FTS queries) + PRAGMA automatic_index = ON; -- Allow SQLite to create automatic indexes + PRAGMA optimize; -- Update query planner statistics + + -- FTS-Specific Optimizations + PRAGMA threads = 4; -- Use multiple threads for FTS operations (if available) + + -- Run comprehensive ANALYZE on all FTS-related tables ANALYZE notes_fts; + ANALYZE notes; + ANALYZE blobs; `); log.info("FTS5 migration completed successfully"); diff --git a/apps/server/src/services/search/expressions/note_content_fulltext.ts b/apps/server/src/services/search/expressions/note_content_fulltext.ts index c836d9ac3..6677d6052 100644 --- a/apps/server/src/services/search/expressions/note_content_fulltext.ts +++ b/apps/server/src/services/search/expressions/note_content_fulltext.ts @@ -81,18 +81,7 @@ class NoteContentFulltextExp extends Expression { // Try to use FTS5 if available for better performance if (ftsSearchService.checkFTS5Availability() && this.canUseFTS5()) { try { - // Performance comparison logging for FTS5 vs traditional search - const searchQuery = this.tokens.join(" "); - const isQuickSearch = searchContext.fastSearch === false; // quick-search sets fastSearch to false - if (isQuickSearch) { - log.info(`[QUICK-SEARCH-COMPARISON] Starting comparison for query: "${searchQuery}" with operator: ${this.operator}`); - } - - // Check if we need to search protected notes - const searchProtected = protectedSessionService.isProtectedSessionAvailable(); - - // Time FTS5 search - const ftsStartTime = Date.now(); + // Use FTS5 for optimized search const noteIdSet = inputNoteSet.getNoteIds(); const ftsResults = ftsSearchService.searchSync( this.tokens, @@ -103,8 +92,6 @@ class NoteContentFulltextExp extends Expression { searchProtected: false // FTS5 doesn't index protected notes } ); - const ftsEndTime = Date.now(); - const ftsTime = ftsEndTime - ftsStartTime; // Add FTS results to note set for (const result of ftsResults) { @@ -113,53 +100,8 @@ class NoteContentFulltextExp extends Expression { } } - // For quick-search, also run traditional search for comparison - if (isQuickSearch) { - const traditionalStartTime = Date.now(); - - // Log the input set size for debugging - log.info(`[QUICK-SEARCH-COMPARISON] Input set size: ${inputNoteSet.notes.length} notes`); - - // Run traditional search for comparison - // Use the dedicated comparison method that always runs the full search - const traditionalResults = this.executeTraditionalSearch(inputNoteSet, searchContext); - - const traditionalEndTime = Date.now(); - const traditionalTime = traditionalEndTime - traditionalStartTime; - - // Log performance comparison - const speedup = traditionalTime > 0 ? (traditionalTime / ftsTime).toFixed(2) : "N/A"; - log.info(`[QUICK-SEARCH-COMPARISON] ===== Results for query: "${searchQuery}" =====`); - log.info(`[QUICK-SEARCH-COMPARISON] FTS5 search: ${ftsTime}ms, found ${ftsResults.length} results`); - log.info(`[QUICK-SEARCH-COMPARISON] Traditional search: ${traditionalTime}ms, found ${traditionalResults.notes.length} results`); - log.info(`[QUICK-SEARCH-COMPARISON] FTS5 is ${speedup}x faster (saved ${traditionalTime - ftsTime}ms)`); - - // Check if results match - const ftsNoteIds = new Set(ftsResults.map(r => r.noteId)); - const traditionalNoteIds = new Set(traditionalResults.notes.map(n => n.noteId)); - const matchingResults = ftsNoteIds.size === traditionalNoteIds.size && - Array.from(ftsNoteIds).every(id => traditionalNoteIds.has(id)); - - if (!matchingResults) { - log.info(`[QUICK-SEARCH-COMPARISON] Results differ! FTS5: ${ftsNoteIds.size} notes, Traditional: ${traditionalNoteIds.size} notes`); - - // Find differences - const onlyInFTS = Array.from(ftsNoteIds).filter(id => !traditionalNoteIds.has(id)); - const onlyInTraditional = Array.from(traditionalNoteIds).filter(id => !ftsNoteIds.has(id)); - - if (onlyInFTS.length > 0) { - log.info(`[QUICK-SEARCH-COMPARISON] Only in FTS5: ${onlyInFTS.slice(0, 5).join(", ")}${onlyInFTS.length > 5 ? "..." : ""}`); - } - if (onlyInTraditional.length > 0) { - log.info(`[QUICK-SEARCH-COMPARISON] Only in Traditional: ${onlyInTraditional.slice(0, 5).join(", ")}${onlyInTraditional.length > 5 ? "..." : ""}`); - } - } else { - log.info(`[QUICK-SEARCH-COMPARISON] Results match perfectly! ✓`); - } - log.info(`[QUICK-SEARCH-COMPARISON] ========================================`); - } - // If we need to search protected notes, use the separate method + const searchProtected = protectedSessionService.isProtectedSessionAvailable(); if (searchProtected) { const protectedResults = ftsSearchService.searchProtectedNotesSync( this.tokens, @@ -258,24 +200,6 @@ class NoteContentFulltextExp extends Expression { return resultNoteSet; } - /** - * Executes traditional search for comparison purposes - * This always runs the full traditional search regardless of operator - */ - private executeTraditionalSearch(inputNoteSet: NoteSet, searchContext: SearchContext): NoteSet { - const resultNoteSet = new NoteSet(); - - for (const row of sql.iterateRows(` - SELECT noteId, type, mime, content, isProtected - FROM notes JOIN blobs USING (blobId) - WHERE type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND isDeleted = 0 - AND LENGTH(content) < ${MAX_SEARCH_CONTENT_SIZE}`)) { - this.findInText(row, inputNoteSet, resultNoteSet); - } - - return resultNoteSet; - } findInText({ noteId, isProtected, content, type, mime }: SearchRow, inputNoteSet: NoteSet, resultNoteSet: NoteSet) { if (!inputNoteSet.hasNoteId(noteId) || !(noteId in becca.notes)) { diff --git a/apps/server/src/services/search/fts_search.test.ts b/apps/server/src/services/search/fts_search.test.ts index c88bdd1cd..d29e3c185 100644 --- a/apps/server/src/services/search/fts_search.test.ts +++ b/apps/server/src/services/search/fts_search.test.ts @@ -34,6 +34,7 @@ describe('FTS5 Search Service', () => { getRows: vi.fn(), getColumn: vi.fn(), execute: vi.fn(), + prepare: vi.fn(), iterateRows: vi.fn(), transactional: vi.fn((fn: Function) => fn()) }; @@ -253,10 +254,19 @@ describe('FTS5 Search Service', () => { ]; mockSql.getRows.mockReturnValue(missingNotes); + // Mock prepared statement + const mockPreparedStatement = { + run: vi.fn(), + finalize: vi.fn() + }; + mockSql.prepare.mockReturnValue(mockPreparedStatement); + const count = ftsSearchService.syncMissingNotes(); expect(count).toBe(2); - expect(mockSql.execute).toHaveBeenCalledTimes(2); + expect(mockSql.prepare).toHaveBeenCalledTimes(1); + expect(mockPreparedStatement.run).toHaveBeenCalledTimes(2); + expect(mockPreparedStatement.finalize).toHaveBeenCalledTimes(1); }); it('should optimize index', () => { diff --git a/apps/server/src/services/search/fts_search.ts b/apps/server/src/services/search/fts_search.ts index d5b155804..e31fc6e93 100644 --- a/apps/server/src/services/search/fts_search.ts +++ b/apps/server/src/services/search/fts_search.ts @@ -70,15 +70,30 @@ const FTS_CONFIG = { */ class FTSSearchService { private isFTS5Available: boolean | null = null; + private checkingAvailability = false; /** * Check if FTS5 is available and properly configured + * Thread-safe implementation to prevent race conditions */ checkFTS5Availability(): boolean { + // Return cached result if available if (this.isFTS5Available !== null) { return this.isFTS5Available; } + // Prevent concurrent checks + if (this.checkingAvailability) { + // Wait for ongoing check to complete by checking again after a short delay + while (this.checkingAvailability && this.isFTS5Available === null) { + // This is a simple spin-wait; in a real async context, you'd use proper synchronization + continue; + } + return this.isFTS5Available ?? false; + } + + this.checkingAvailability = true; + try { // Check if FTS5 extension is available const result = sql.getRow(` @@ -101,6 +116,8 @@ class FTSSearchService { if (!this.isFTS5Available) { log.info("FTS5 table not found, full-text search not available"); + } else { + log.info("FTS5 full-text search is available and configured"); } return this.isFTS5Available; @@ -108,6 +125,8 @@ class FTSSearchService { log.error(`Error checking FTS5 availability: ${error}`); this.isFTS5Available = false; return false; + } finally { + this.checkingAvailability = false; } } @@ -268,14 +287,19 @@ class FTSSearchService { return 0; } - // Insert missing notes in batches + // Insert missing notes using efficient batch processing sql.transactional(() => { + // Use prepared statement for better performance + const insertStmt = sql.prepare(` + INSERT OR REPLACE INTO notes_fts (noteId, title, content) + VALUES (?, ?, ?) + `); + for (const note of missingNotes) { - sql.execute(` - INSERT INTO notes_fts (noteId, title, content) - VALUES (?, ?, ?) - `, [note.noteId, note.title, note.content]); + insertStmt.run(note.noteId, note.title, note.content); } + + insertStmt.finalize(); }); log.info(`Synced ${missingNotes.length} missing notes to FTS index`); diff --git a/apps/server/src/services/sql_init.ts b/apps/server/src/services/sql_init.ts index 9fc9ba2e5..f3f9d902a 100644 --- a/apps/server/src/services/sql_init.ts +++ b/apps/server/src/services/sql_init.ts @@ -44,6 +44,9 @@ async function initDbConnection() { await migrationService.migrateIfNecessary(); + // Initialize optimized SQLite pragmas for FTS and large database performance + initializeFTSPragmas(); + sql.execute('CREATE TEMP TABLE "param_list" (`paramId` TEXT NOT NULL PRIMARY KEY)'); sql.execute(` @@ -185,6 +188,42 @@ function setDbAsInitialized() { } } +/** + * Initialize SQLite pragmas optimized for FTS5 and large databases + */ +function initializeFTSPragmas() { + if (config.General.readOnly) { + return; + } + + try { + log.info("Setting SQLite pragmas for FTS5 and large database optimization..."); + + sql.executeScript(` + -- Memory Management (Critical for FTS performance with millions of notes) + PRAGMA cache_size = -262144; -- 256MB cache for better query performance + PRAGMA temp_store = MEMORY; -- Use memory for temporary tables and indices + PRAGMA mmap_size = 536870912; -- 512MB memory-mapped I/O for better read performance + + -- Write Optimization (Better for concurrent operations) + PRAGMA synchronous = NORMAL; -- Balance safety and performance (FULL is too slow for large operations) + PRAGMA journal_mode = WAL; -- Write-Ahead Logging for better concurrency + PRAGMA wal_autocheckpoint = 1000; -- Checkpoint every 1000 pages for memory management + + -- Query Optimization (Essential for complex FTS queries) + PRAGMA automatic_index = ON; -- Allow SQLite to create automatic indexes when beneficial + + -- FTS-Specific Optimizations + PRAGMA threads = 4; -- Use multiple threads for FTS operations if available + `); + + log.info("FTS pragmas initialized successfully"); + } catch (error) { + log.error(`Failed to initialize FTS pragmas: ${error}`); + // Don't throw - continue with default settings + } +} + function optimize() { if (config.General.readOnly) { return;