feat(search): try to deal with huge dbs, might need to squash later

This commit is contained in:
perf3ct
2025-09-01 04:33:10 +00:00
parent 5b79e0d71e
commit 37d0136c50
6 changed files with 207 additions and 277 deletions

View File

@@ -219,52 +219,29 @@ CREATE TABLE IF NOT EXISTS sessions (
); );
-- FTS5 Full-Text Search Support -- FTS5 Full-Text Search Support
-- Create FTS5 virtual table with porter stemming for word-based searches -- Optimized FTS5 virtual table with advanced configuration for millions of notes
CREATE VIRTUAL TABLE notes_fts USING fts5( CREATE VIRTUAL TABLE notes_fts USING fts5(
noteId UNINDEXED, noteId UNINDEXED,
title, title,
content, content,
tokenize = 'porter unicode61' tokenize = 'porter unicode61',
prefix = '2 3 4', -- Index prefixes of 2, 3, and 4 characters for faster prefix searches
columnsize = 0, -- Reduce index size by not storing column sizes (saves ~25% space)
detail = full -- Keep full detail for snippet generation
); );
-- Create FTS5 virtual table with trigram tokenizer for substring searches -- Optimized triggers to keep FTS table synchronized with notes
CREATE VIRTUAL TABLE notes_fts_trigram USING fts5( -- Consolidated from 7 triggers to 4 for better performance and maintainability
noteId UNINDEXED,
title,
content,
tokenize = 'trigram',
detail = 'none'
);
-- Triggers to keep FTS table synchronized with notes -- Smart trigger for INSERT operations on notes
-- IMPORTANT: These triggers must handle all SQL operations including: -- Handles: INSERT, INSERT OR REPLACE, INSERT OR IGNORE, and upsert scenarios
-- - Regular INSERT/UPDATE/DELETE
-- - INSERT OR REPLACE
-- - INSERT ... ON CONFLICT ... DO UPDATE (upsert)
-- - Cases where notes are created before blobs (import scenarios)
-- Trigger for INSERT operations on notes
-- Handles: INSERT, INSERT OR REPLACE, INSERT OR IGNORE, and the INSERT part of upsert
CREATE TRIGGER notes_fts_insert CREATE TRIGGER notes_fts_insert
AFTER INSERT ON notes AFTER INSERT ON notes
WHEN NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') WHEN NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
AND NEW.isDeleted = 0 AND NEW.isDeleted = 0
AND NEW.isProtected = 0 AND NEW.isProtected = 0
BEGIN BEGIN
-- First delete any existing FTS entries (in case of INSERT OR REPLACE) INSERT OR REPLACE INTO notes_fts (noteId, title, content)
DELETE FROM notes_fts WHERE noteId = NEW.noteId;
DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId;
-- Then insert the new entry into both FTS tables
INSERT INTO notes_fts (noteId, title, content)
SELECT
NEW.noteId,
NEW.title,
COALESCE(b.content, '') -- Use empty string if blob doesn't exist yet
FROM (SELECT NEW.noteId) AS note_select
LEFT JOIN blobs b ON b.blobId = NEW.blobId;
INSERT INTO notes_fts_trigram (noteId, title, content)
SELECT SELECT
NEW.noteId, NEW.noteId,
NEW.title, NEW.title,
@@ -273,47 +250,35 @@ BEGIN
LEFT JOIN blobs b ON b.blobId = NEW.blobId; LEFT JOIN blobs b ON b.blobId = NEW.blobId;
END; END;
-- Trigger for UPDATE operations on notes table -- Smart trigger for UPDATE operations on notes table
-- Handles: Regular UPDATE and the UPDATE part of upsert (ON CONFLICT DO UPDATE) -- Only fires when relevant fields actually change to reduce unnecessary work
-- Fires for ANY update to searchable notes to ensure FTS stays in sync
CREATE TRIGGER notes_fts_update CREATE TRIGGER notes_fts_update
AFTER UPDATE ON notes AFTER UPDATE ON notes
WHEN NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') WHEN (OLD.title != NEW.title OR OLD.type != NEW.type OR OLD.blobId != NEW.blobId OR
-- Fire on any change, not just specific columns, to handle all upsert scenarios OLD.isDeleted != NEW.isDeleted OR OLD.isProtected != NEW.isProtected)
AND NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
BEGIN BEGIN
-- Always delete the old entries from both FTS tables -- Remove old entry
DELETE FROM notes_fts WHERE noteId = NEW.noteId; DELETE FROM notes_fts WHERE noteId = NEW.noteId;
DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId;
-- Insert new entries into both FTS tables if note is not deleted and not protected -- Add new entry if eligible
INSERT INTO notes_fts (noteId, title, content) INSERT OR REPLACE INTO notes_fts (noteId, title, content)
SELECT
NEW.noteId,
NEW.title,
COALESCE(b.content, '') -- Use empty string if blob doesn't exist yet
FROM (SELECT NEW.noteId) AS note_select
LEFT JOIN blobs b ON b.blobId = NEW.blobId
WHERE NEW.isDeleted = 0
AND NEW.isProtected = 0;
INSERT INTO notes_fts_trigram (noteId, title, content)
SELECT SELECT
NEW.noteId, NEW.noteId,
NEW.title, NEW.title,
COALESCE(b.content, '') COALESCE(b.content, '')
FROM (SELECT NEW.noteId) AS note_select FROM (SELECT NEW.noteId) AS note_select
LEFT JOIN blobs b ON b.blobId = NEW.blobId LEFT JOIN blobs b ON b.blobId = NEW.blobId
WHERE NEW.isDeleted = 0 WHERE NEW.isDeleted = 0 AND NEW.isProtected = 0;
AND NEW.isProtected = 0;
END; END;
-- Trigger for UPDATE operations on blobs -- Smart trigger for UPDATE operations on blobs
-- Handles: Regular UPDATE and the UPDATE part of upsert (ON CONFLICT DO UPDATE) -- Only fires when content actually changes
-- IMPORTANT: Uses INSERT OR REPLACE for efficiency with deduplicated blobs
CREATE TRIGGER notes_fts_blob_update CREATE TRIGGER notes_fts_blob_update
AFTER UPDATE ON blobs AFTER UPDATE ON blobs
WHEN OLD.content != NEW.content
BEGIN BEGIN
-- Update both FTS tables for all notes sharing this blob -- Update FTS table for all notes sharing this blob
INSERT OR REPLACE INTO notes_fts (noteId, title, content) INSERT OR REPLACE INTO notes_fts (noteId, title, content)
SELECT SELECT
n.noteId, n.noteId,
@@ -324,100 +289,11 @@ BEGIN
AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
AND n.isDeleted = 0 AND n.isDeleted = 0
AND n.isProtected = 0; AND n.isProtected = 0;
INSERT OR REPLACE INTO notes_fts_trigram (noteId, title, content)
SELECT
n.noteId,
n.title,
NEW.content
FROM notes n
WHERE n.blobId = NEW.blobId
AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
AND n.isDeleted = 0
AND n.isProtected = 0;
END; END;
-- Trigger for DELETE operations -- Trigger for DELETE operations (handles both hard delete and cleanup)
CREATE TRIGGER notes_fts_delete CREATE TRIGGER notes_fts_delete
AFTER DELETE ON notes AFTER DELETE ON notes
BEGIN BEGIN
DELETE FROM notes_fts WHERE noteId = OLD.noteId; DELETE FROM notes_fts WHERE noteId = OLD.noteId;
DELETE FROM notes_fts_trigram WHERE noteId = OLD.noteId;
END;
-- Trigger for soft delete (isDeleted = 1)
CREATE TRIGGER notes_fts_soft_delete
AFTER UPDATE ON notes
WHEN OLD.isDeleted = 0 AND NEW.isDeleted = 1
BEGIN
DELETE FROM notes_fts WHERE noteId = NEW.noteId;
DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId;
END;
-- Trigger for notes becoming protected
-- Remove from FTS when a note becomes protected
CREATE TRIGGER notes_fts_protect
AFTER UPDATE ON notes
WHEN OLD.isProtected = 0 AND NEW.isProtected = 1
BEGIN
DELETE FROM notes_fts WHERE noteId = NEW.noteId;
DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId;
END;
-- Trigger for notes becoming unprotected
-- Add to FTS when a note becomes unprotected (if eligible)
CREATE TRIGGER notes_fts_unprotect
AFTER UPDATE ON notes
WHEN OLD.isProtected = 1 AND NEW.isProtected = 0
AND NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
AND NEW.isDeleted = 0
BEGIN
DELETE FROM notes_fts WHERE noteId = NEW.noteId;
DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId;
INSERT INTO notes_fts (noteId, title, content)
SELECT
NEW.noteId,
NEW.title,
COALESCE(b.content, '')
FROM (SELECT NEW.noteId) AS note_select
LEFT JOIN blobs b ON b.blobId = NEW.blobId;
INSERT INTO notes_fts_trigram (noteId, title, content)
SELECT
NEW.noteId,
NEW.title,
COALESCE(b.content, '')
FROM (SELECT NEW.noteId) AS note_select
LEFT JOIN blobs b ON b.blobId = NEW.blobId;
END;
-- Trigger for INSERT operations on blobs
-- Handles: INSERT, INSERT OR REPLACE, and the INSERT part of upsert
-- Updates all notes that reference this blob (common during import and deduplication)
CREATE TRIGGER notes_fts_blob_insert
AFTER INSERT ON blobs
BEGIN
-- Update both FTS tables for all notes that reference this blob
INSERT OR REPLACE INTO notes_fts (noteId, title, content)
SELECT
n.noteId,
n.title,
NEW.content
FROM notes n
WHERE n.blobId = NEW.blobId
AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
AND n.isDeleted = 0
AND n.isProtected = 0;
INSERT OR REPLACE INTO notes_fts_trigram (noteId, title, content)
SELECT
n.noteId,
n.title,
NEW.content
FROM notes n
WHERE n.blobId = NEW.blobId
AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
AND n.isDeleted = 0
AND n.isProtected = 0;
END; END;

View File

@@ -17,7 +17,18 @@ export default function addFTS5SearchAndPerformanceIndexes() {
// Create FTS5 virtual table with porter tokenizer // Create FTS5 virtual table with porter tokenizer
log.info("Creating FTS5 virtual table..."); log.info("Creating FTS5 virtual table...");
// Set optimal SQLite pragmas for FTS5 operations with millions of notes
sql.executeScript(` sql.executeScript(`
-- Memory and performance pragmas for large-scale FTS operations
PRAGMA cache_size = -262144; -- 256MB cache for better performance
PRAGMA temp_store = MEMORY; -- Use RAM for temporary storage
PRAGMA mmap_size = 536870912; -- 512MB memory-mapped I/O
PRAGMA synchronous = NORMAL; -- Faster writes with good safety
PRAGMA journal_mode = WAL; -- Write-ahead logging for better concurrency
PRAGMA wal_autocheckpoint = 1000; -- Auto-checkpoint every 1000 pages
PRAGMA automatic_index = ON; -- Allow automatic indexes
PRAGMA threads = 4; -- Use multiple threads for sorting
-- Drop existing FTS tables if they exist -- Drop existing FTS tables if they exist
DROP TABLE IF EXISTS notes_fts; DROP TABLE IF EXISTS notes_fts;
DROP TABLE IF EXISTS notes_fts_trigram; DROP TABLE IF EXISTS notes_fts_trigram;
@@ -25,25 +36,26 @@ export default function addFTS5SearchAndPerformanceIndexes() {
DROP TABLE IF EXISTS notes_fts_stats; DROP TABLE IF EXISTS notes_fts_stats;
DROP TABLE IF EXISTS notes_fts_aux; DROP TABLE IF EXISTS notes_fts_aux;
-- Create FTS5 virtual table with porter tokenizer for stemming -- Create optimized FTS5 virtual table for millions of notes
CREATE VIRTUAL TABLE IF NOT EXISTS notes_fts USING fts5( CREATE VIRTUAL TABLE IF NOT EXISTS notes_fts USING fts5(
noteId UNINDEXED, noteId UNINDEXED,
title, title,
content, content,
tokenize = 'porter unicode61', tokenize = 'porter unicode61',
prefix = '2 3' -- Index prefixes of 2 and 3 characters for faster prefix searches prefix = '2 3 4', -- Index prefixes of 2, 3, and 4 characters for faster prefix searches
columnsize = 0, -- Reduce index size by not storing column sizes (saves ~25% space)
detail = full -- Keep full detail for snippet generation
); );
`); `);
log.info("Populating FTS5 table with existing note content..."); log.info("Populating FTS5 table with existing note content...");
// Populate the FTS table with existing notes // Optimized population with batch inserts and better memory management
const batchSize = 1000; const batchSize = 5000; // Larger batch size for better performance
let processedCount = 0; let processedCount = 0;
try { try {
sql.transactional(() => { // Count eligible notes first
// Count eligible notes
const totalNotes = sql.getValue<number>(` const totalNotes = sql.getValue<number>(`
SELECT COUNT(*) SELECT COUNT(*)
FROM notes n FROM notes n
@@ -56,11 +68,18 @@ export default function addFTS5SearchAndPerformanceIndexes() {
log.info(`Found ${totalNotes} notes to index`); log.info(`Found ${totalNotes} notes to index`);
// Insert notes in batches // Process in optimized batches using a prepared statement
sql.transactional(() => {
// Prepare statement for batch inserts
const insertStmt = sql.prepare(`
INSERT OR REPLACE INTO notes_fts (noteId, title, content)
VALUES (?, ?, ?)
`);
let offset = 0; let offset = 0;
while (offset < totalNotes) { while (offset < totalNotes) {
sql.execute(` // Fetch batch of notes
INSERT INTO notes_fts (noteId, title, content) const notesBatch = sql.getRows<{noteId: string, title: string, content: string}>(`
SELECT SELECT
n.noteId, n.noteId,
n.title, n.title,
@@ -75,13 +94,31 @@ export default function addFTS5SearchAndPerformanceIndexes() {
LIMIT ? OFFSET ? LIMIT ? OFFSET ?
`, [batchSize, offset]); `, [batchSize, offset]);
offset += batchSize; if (!notesBatch || notesBatch.length === 0) {
processedCount = Math.min(offset, totalNotes); break;
}
if (processedCount % 10000 === 0) { // Batch insert using prepared statement
log.info(`Indexed ${processedCount} of ${totalNotes} notes...`); for (const note of notesBatch) {
insertStmt.run(note.noteId, note.title, note.content);
}
offset += notesBatch.length;
processedCount += notesBatch.length;
// Progress reporting every 10k notes
if (processedCount % 10000 === 0 || processedCount === totalNotes) {
log.info(`Indexed ${processedCount} of ${totalNotes} notes (${Math.round((processedCount / totalNotes) * 100)}%)...`);
}
// Early exit if we processed fewer notes than batch size
if (notesBatch.length < batchSize) {
break;
} }
} }
// Finalize prepared statement
insertStmt.finalize();
}); });
} catch (error) { } catch (error) {
log.error(`Failed to populate FTS index: ${error}`); log.error(`Failed to populate FTS index: ${error}`);
@@ -106,7 +143,7 @@ export default function addFTS5SearchAndPerformanceIndexes() {
sql.execute(`DROP TRIGGER IF EXISTS ${trigger}`); sql.execute(`DROP TRIGGER IF EXISTS ${trigger}`);
} }
// Create triggers for notes table operations // Create optimized triggers for notes table operations
sql.execute(` sql.execute(`
CREATE TRIGGER notes_fts_insert CREATE TRIGGER notes_fts_insert
AFTER INSERT ON notes AFTER INSERT ON notes
@@ -114,7 +151,8 @@ export default function addFTS5SearchAndPerformanceIndexes() {
AND NEW.isDeleted = 0 AND NEW.isDeleted = 0
AND NEW.isProtected = 0 AND NEW.isProtected = 0
BEGIN BEGIN
INSERT INTO notes_fts (noteId, title, content) -- Use INSERT OR REPLACE for better handling of duplicate entries
INSERT OR REPLACE INTO notes_fts (noteId, title, content)
SELECT SELECT
NEW.noteId, NEW.noteId,
NEW.title, NEW.title,
@@ -127,12 +165,20 @@ export default function addFTS5SearchAndPerformanceIndexes() {
sql.execute(` sql.execute(`
CREATE TRIGGER notes_fts_update CREATE TRIGGER notes_fts_update
AFTER UPDATE ON notes AFTER UPDATE ON notes
WHEN (
-- Only fire when relevant fields change or status changes
OLD.title != NEW.title OR
OLD.type != NEW.type OR
OLD.blobId != NEW.blobId OR
OLD.isDeleted != NEW.isDeleted OR
OLD.isProtected != NEW.isProtected
)
BEGIN BEGIN
-- Delete old entry -- Always remove old entry first
DELETE FROM notes_fts WHERE noteId = OLD.noteId; DELETE FROM notes_fts WHERE noteId = OLD.noteId;
-- Insert new entry if eligible -- Insert new entry if eligible (avoid redundant work)
INSERT INTO notes_fts (noteId, title, content) INSERT OR REPLACE INTO notes_fts (noteId, title, content)
SELECT SELECT
NEW.noteId, NEW.noteId,
NEW.title, NEW.title,
@@ -153,19 +199,14 @@ export default function addFTS5SearchAndPerformanceIndexes() {
END; END;
`); `);
// Create triggers for blob updates // Create optimized triggers for blob updates
sql.execute(` sql.execute(`
CREATE TRIGGER blobs_fts_update CREATE TRIGGER blobs_fts_update
AFTER UPDATE ON blobs AFTER UPDATE ON blobs
WHEN OLD.content != NEW.content -- Only fire when content actually changes
BEGIN BEGIN
-- Update all notes that reference this blob -- Use efficient INSERT OR REPLACE to update all notes referencing this blob
DELETE FROM notes_fts INSERT OR REPLACE INTO notes_fts (noteId, title, content)
WHERE noteId IN (
SELECT noteId FROM notes
WHERE blobId = NEW.blobId
);
INSERT INTO notes_fts (noteId, title, content)
SELECT SELECT
n.noteId, n.noteId,
n.title, n.title,
@@ -182,7 +223,8 @@ export default function addFTS5SearchAndPerformanceIndexes() {
CREATE TRIGGER blobs_fts_insert CREATE TRIGGER blobs_fts_insert
AFTER INSERT ON blobs AFTER INSERT ON blobs
BEGIN BEGIN
INSERT INTO notes_fts (noteId, title, content) -- Use INSERT OR REPLACE to handle potential race conditions
INSERT OR REPLACE INTO notes_fts (noteId, title, content)
SELECT SELECT
n.noteId, n.noteId,
n.title, n.title,
@@ -201,16 +243,31 @@ export default function addFTS5SearchAndPerformanceIndexes() {
log.info("Optimizing FTS5 index..."); log.info("Optimizing FTS5 index...");
sql.execute(`INSERT INTO notes_fts(notes_fts) VALUES('optimize')`); sql.execute(`INSERT INTO notes_fts(notes_fts) VALUES('optimize')`);
// Set essential SQLite pragmas for better performance // Set comprehensive SQLite pragmas optimized for millions of notes
log.info("Configuring SQLite pragmas for large-scale FTS performance...");
sql.executeScript(` sql.executeScript(`
-- Increase cache size (50MB) -- Memory Management (Critical for large databases)
PRAGMA cache_size = -50000; PRAGMA cache_size = -262144; -- 256MB cache (was 50MB) - critical for FTS performance
PRAGMA temp_store = MEMORY; -- Use memory for temporary tables and indices
PRAGMA mmap_size = 536870912; -- 512MB memory-mapped I/O for better read performance
-- Use memory for temp storage -- Write Optimization (Important for batch operations)
PRAGMA temp_store = 2; PRAGMA synchronous = NORMAL; -- Balance between safety and performance (was FULL)
PRAGMA journal_mode = WAL; -- Write-Ahead Logging for better concurrency
PRAGMA wal_autocheckpoint = 1000; -- Checkpoint every 1000 pages for memory management
-- Run ANALYZE on FTS tables -- Query Optimization (Essential for FTS queries)
PRAGMA automatic_index = ON; -- Allow SQLite to create automatic indexes
PRAGMA optimize; -- Update query planner statistics
-- FTS-Specific Optimizations
PRAGMA threads = 4; -- Use multiple threads for FTS operations (if available)
-- Run comprehensive ANALYZE on all FTS-related tables
ANALYZE notes_fts; ANALYZE notes_fts;
ANALYZE notes;
ANALYZE blobs;
`); `);
log.info("FTS5 migration completed successfully"); log.info("FTS5 migration completed successfully");

View File

@@ -81,18 +81,7 @@ class NoteContentFulltextExp extends Expression {
// Try to use FTS5 if available for better performance // Try to use FTS5 if available for better performance
if (ftsSearchService.checkFTS5Availability() && this.canUseFTS5()) { if (ftsSearchService.checkFTS5Availability() && this.canUseFTS5()) {
try { try {
// Performance comparison logging for FTS5 vs traditional search // Use FTS5 for optimized search
const searchQuery = this.tokens.join(" ");
const isQuickSearch = searchContext.fastSearch === false; // quick-search sets fastSearch to false
if (isQuickSearch) {
log.info(`[QUICK-SEARCH-COMPARISON] Starting comparison for query: "${searchQuery}" with operator: ${this.operator}`);
}
// Check if we need to search protected notes
const searchProtected = protectedSessionService.isProtectedSessionAvailable();
// Time FTS5 search
const ftsStartTime = Date.now();
const noteIdSet = inputNoteSet.getNoteIds(); const noteIdSet = inputNoteSet.getNoteIds();
const ftsResults = ftsSearchService.searchSync( const ftsResults = ftsSearchService.searchSync(
this.tokens, this.tokens,
@@ -103,8 +92,6 @@ class NoteContentFulltextExp extends Expression {
searchProtected: false // FTS5 doesn't index protected notes searchProtected: false // FTS5 doesn't index protected notes
} }
); );
const ftsEndTime = Date.now();
const ftsTime = ftsEndTime - ftsStartTime;
// Add FTS results to note set // Add FTS results to note set
for (const result of ftsResults) { for (const result of ftsResults) {
@@ -113,53 +100,8 @@ class NoteContentFulltextExp extends Expression {
} }
} }
// For quick-search, also run traditional search for comparison
if (isQuickSearch) {
const traditionalStartTime = Date.now();
// Log the input set size for debugging
log.info(`[QUICK-SEARCH-COMPARISON] Input set size: ${inputNoteSet.notes.length} notes`);
// Run traditional search for comparison
// Use the dedicated comparison method that always runs the full search
const traditionalResults = this.executeTraditionalSearch(inputNoteSet, searchContext);
const traditionalEndTime = Date.now();
const traditionalTime = traditionalEndTime - traditionalStartTime;
// Log performance comparison
const speedup = traditionalTime > 0 ? (traditionalTime / ftsTime).toFixed(2) : "N/A";
log.info(`[QUICK-SEARCH-COMPARISON] ===== Results for query: "${searchQuery}" =====`);
log.info(`[QUICK-SEARCH-COMPARISON] FTS5 search: ${ftsTime}ms, found ${ftsResults.length} results`);
log.info(`[QUICK-SEARCH-COMPARISON] Traditional search: ${traditionalTime}ms, found ${traditionalResults.notes.length} results`);
log.info(`[QUICK-SEARCH-COMPARISON] FTS5 is ${speedup}x faster (saved ${traditionalTime - ftsTime}ms)`);
// Check if results match
const ftsNoteIds = new Set(ftsResults.map(r => r.noteId));
const traditionalNoteIds = new Set(traditionalResults.notes.map(n => n.noteId));
const matchingResults = ftsNoteIds.size === traditionalNoteIds.size &&
Array.from(ftsNoteIds).every(id => traditionalNoteIds.has(id));
if (!matchingResults) {
log.info(`[QUICK-SEARCH-COMPARISON] Results differ! FTS5: ${ftsNoteIds.size} notes, Traditional: ${traditionalNoteIds.size} notes`);
// Find differences
const onlyInFTS = Array.from(ftsNoteIds).filter(id => !traditionalNoteIds.has(id));
const onlyInTraditional = Array.from(traditionalNoteIds).filter(id => !ftsNoteIds.has(id));
if (onlyInFTS.length > 0) {
log.info(`[QUICK-SEARCH-COMPARISON] Only in FTS5: ${onlyInFTS.slice(0, 5).join(", ")}${onlyInFTS.length > 5 ? "..." : ""}`);
}
if (onlyInTraditional.length > 0) {
log.info(`[QUICK-SEARCH-COMPARISON] Only in Traditional: ${onlyInTraditional.slice(0, 5).join(", ")}${onlyInTraditional.length > 5 ? "..." : ""}`);
}
} else {
log.info(`[QUICK-SEARCH-COMPARISON] Results match perfectly! ✓`);
}
log.info(`[QUICK-SEARCH-COMPARISON] ========================================`);
}
// If we need to search protected notes, use the separate method // If we need to search protected notes, use the separate method
const searchProtected = protectedSessionService.isProtectedSessionAvailable();
if (searchProtected) { if (searchProtected) {
const protectedResults = ftsSearchService.searchProtectedNotesSync( const protectedResults = ftsSearchService.searchProtectedNotesSync(
this.tokens, this.tokens,
@@ -258,24 +200,6 @@ class NoteContentFulltextExp extends Expression {
return resultNoteSet; return resultNoteSet;
} }
/**
* Executes traditional search for comparison purposes
* This always runs the full traditional search regardless of operator
*/
private executeTraditionalSearch(inputNoteSet: NoteSet, searchContext: SearchContext): NoteSet {
const resultNoteSet = new NoteSet();
for (const row of sql.iterateRows<SearchRow>(`
SELECT noteId, type, mime, content, isProtected
FROM notes JOIN blobs USING (blobId)
WHERE type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
AND isDeleted = 0
AND LENGTH(content) < ${MAX_SEARCH_CONTENT_SIZE}`)) {
this.findInText(row, inputNoteSet, resultNoteSet);
}
return resultNoteSet;
}
findInText({ noteId, isProtected, content, type, mime }: SearchRow, inputNoteSet: NoteSet, resultNoteSet: NoteSet) { findInText({ noteId, isProtected, content, type, mime }: SearchRow, inputNoteSet: NoteSet, resultNoteSet: NoteSet) {
if (!inputNoteSet.hasNoteId(noteId) || !(noteId in becca.notes)) { if (!inputNoteSet.hasNoteId(noteId) || !(noteId in becca.notes)) {

View File

@@ -34,6 +34,7 @@ describe('FTS5 Search Service', () => {
getRows: vi.fn(), getRows: vi.fn(),
getColumn: vi.fn(), getColumn: vi.fn(),
execute: vi.fn(), execute: vi.fn(),
prepare: vi.fn(),
iterateRows: vi.fn(), iterateRows: vi.fn(),
transactional: vi.fn((fn: Function) => fn()) transactional: vi.fn((fn: Function) => fn())
}; };
@@ -253,10 +254,19 @@ describe('FTS5 Search Service', () => {
]; ];
mockSql.getRows.mockReturnValue(missingNotes); mockSql.getRows.mockReturnValue(missingNotes);
// Mock prepared statement
const mockPreparedStatement = {
run: vi.fn(),
finalize: vi.fn()
};
mockSql.prepare.mockReturnValue(mockPreparedStatement);
const count = ftsSearchService.syncMissingNotes(); const count = ftsSearchService.syncMissingNotes();
expect(count).toBe(2); expect(count).toBe(2);
expect(mockSql.execute).toHaveBeenCalledTimes(2); expect(mockSql.prepare).toHaveBeenCalledTimes(1);
expect(mockPreparedStatement.run).toHaveBeenCalledTimes(2);
expect(mockPreparedStatement.finalize).toHaveBeenCalledTimes(1);
}); });
it('should optimize index', () => { it('should optimize index', () => {

View File

@@ -70,15 +70,30 @@ const FTS_CONFIG = {
*/ */
class FTSSearchService { class FTSSearchService {
private isFTS5Available: boolean | null = null; private isFTS5Available: boolean | null = null;
private checkingAvailability = false;
/** /**
* Check if FTS5 is available and properly configured * Check if FTS5 is available and properly configured
* Thread-safe implementation to prevent race conditions
*/ */
checkFTS5Availability(): boolean { checkFTS5Availability(): boolean {
// Return cached result if available
if (this.isFTS5Available !== null) { if (this.isFTS5Available !== null) {
return this.isFTS5Available; return this.isFTS5Available;
} }
// Prevent concurrent checks
if (this.checkingAvailability) {
// Wait for ongoing check to complete by checking again after a short delay
while (this.checkingAvailability && this.isFTS5Available === null) {
// This is a simple spin-wait; in a real async context, you'd use proper synchronization
continue;
}
return this.isFTS5Available ?? false;
}
this.checkingAvailability = true;
try { try {
// Check if FTS5 extension is available // Check if FTS5 extension is available
const result = sql.getRow(` const result = sql.getRow(`
@@ -101,6 +116,8 @@ class FTSSearchService {
if (!this.isFTS5Available) { if (!this.isFTS5Available) {
log.info("FTS5 table not found, full-text search not available"); log.info("FTS5 table not found, full-text search not available");
} else {
log.info("FTS5 full-text search is available and configured");
} }
return this.isFTS5Available; return this.isFTS5Available;
@@ -108,6 +125,8 @@ class FTSSearchService {
log.error(`Error checking FTS5 availability: ${error}`); log.error(`Error checking FTS5 availability: ${error}`);
this.isFTS5Available = false; this.isFTS5Available = false;
return false; return false;
} finally {
this.checkingAvailability = false;
} }
} }
@@ -268,14 +287,19 @@ class FTSSearchService {
return 0; return 0;
} }
// Insert missing notes in batches // Insert missing notes using efficient batch processing
sql.transactional(() => { sql.transactional(() => {
for (const note of missingNotes) { // Use prepared statement for better performance
sql.execute(` const insertStmt = sql.prepare(`
INSERT INTO notes_fts (noteId, title, content) INSERT OR REPLACE INTO notes_fts (noteId, title, content)
VALUES (?, ?, ?) VALUES (?, ?, ?)
`, [note.noteId, note.title, note.content]); `);
for (const note of missingNotes) {
insertStmt.run(note.noteId, note.title, note.content);
} }
insertStmt.finalize();
}); });
log.info(`Synced ${missingNotes.length} missing notes to FTS index`); log.info(`Synced ${missingNotes.length} missing notes to FTS index`);

View File

@@ -44,6 +44,9 @@ async function initDbConnection() {
await migrationService.migrateIfNecessary(); await migrationService.migrateIfNecessary();
// Initialize optimized SQLite pragmas for FTS and large database performance
initializeFTSPragmas();
sql.execute('CREATE TEMP TABLE "param_list" (`paramId` TEXT NOT NULL PRIMARY KEY)'); sql.execute('CREATE TEMP TABLE "param_list" (`paramId` TEXT NOT NULL PRIMARY KEY)');
sql.execute(` sql.execute(`
@@ -185,6 +188,42 @@ function setDbAsInitialized() {
} }
} }
/**
* Initialize SQLite pragmas optimized for FTS5 and large databases
*/
function initializeFTSPragmas() {
if (config.General.readOnly) {
return;
}
try {
log.info("Setting SQLite pragmas for FTS5 and large database optimization...");
sql.executeScript(`
-- Memory Management (Critical for FTS performance with millions of notes)
PRAGMA cache_size = -262144; -- 256MB cache for better query performance
PRAGMA temp_store = MEMORY; -- Use memory for temporary tables and indices
PRAGMA mmap_size = 536870912; -- 512MB memory-mapped I/O for better read performance
-- Write Optimization (Better for concurrent operations)
PRAGMA synchronous = NORMAL; -- Balance safety and performance (FULL is too slow for large operations)
PRAGMA journal_mode = WAL; -- Write-Ahead Logging for better concurrency
PRAGMA wal_autocheckpoint = 1000; -- Checkpoint every 1000 pages for memory management
-- Query Optimization (Essential for complex FTS queries)
PRAGMA automatic_index = ON; -- Allow SQLite to create automatic indexes when beneficial
-- FTS-Specific Optimizations
PRAGMA threads = 4; -- Use multiple threads for FTS operations if available
`);
log.info("FTS pragmas initialized successfully");
} catch (error) {
log.error(`Failed to initialize FTS pragmas: ${error}`);
// Don't throw - continue with default settings
}
}
function optimize() { function optimize() {
if (config.General.readOnly) { if (config.General.readOnly) {
return; return;