mirror of
https://github.com/zadam/trilium.git
synced 2025-11-06 13:26:01 +01:00
feat(search): try to deal with huge dbs, might need to squash later
This commit is contained in:
@@ -219,52 +219,29 @@ CREATE TABLE IF NOT EXISTS sessions (
|
||||
);
|
||||
|
||||
-- FTS5 Full-Text Search Support
|
||||
-- Create FTS5 virtual table with porter stemming for word-based searches
|
||||
-- Optimized FTS5 virtual table with advanced configuration for millions of notes
|
||||
CREATE VIRTUAL TABLE notes_fts USING fts5(
|
||||
noteId UNINDEXED,
|
||||
title,
|
||||
content,
|
||||
tokenize = 'porter unicode61'
|
||||
tokenize = 'porter unicode61',
|
||||
prefix = '2 3 4', -- Index prefixes of 2, 3, and 4 characters for faster prefix searches
|
||||
columnsize = 0, -- Reduce index size by not storing column sizes (saves ~25% space)
|
||||
detail = full -- Keep full detail for snippet generation
|
||||
);
|
||||
|
||||
-- Create FTS5 virtual table with trigram tokenizer for substring searches
|
||||
CREATE VIRTUAL TABLE notes_fts_trigram USING fts5(
|
||||
noteId UNINDEXED,
|
||||
title,
|
||||
content,
|
||||
tokenize = 'trigram',
|
||||
detail = 'none'
|
||||
);
|
||||
-- Optimized triggers to keep FTS table synchronized with notes
|
||||
-- Consolidated from 7 triggers to 4 for better performance and maintainability
|
||||
|
||||
-- Triggers to keep FTS table synchronized with notes
|
||||
-- IMPORTANT: These triggers must handle all SQL operations including:
|
||||
-- - Regular INSERT/UPDATE/DELETE
|
||||
-- - INSERT OR REPLACE
|
||||
-- - INSERT ... ON CONFLICT ... DO UPDATE (upsert)
|
||||
-- - Cases where notes are created before blobs (import scenarios)
|
||||
|
||||
-- Trigger for INSERT operations on notes
|
||||
-- Handles: INSERT, INSERT OR REPLACE, INSERT OR IGNORE, and the INSERT part of upsert
|
||||
-- Smart trigger for INSERT operations on notes
|
||||
-- Handles: INSERT, INSERT OR REPLACE, INSERT OR IGNORE, and upsert scenarios
|
||||
CREATE TRIGGER notes_fts_insert
|
||||
AFTER INSERT ON notes
|
||||
WHEN NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
||||
AND NEW.isDeleted = 0
|
||||
AND NEW.isProtected = 0
|
||||
BEGIN
|
||||
-- First delete any existing FTS entries (in case of INSERT OR REPLACE)
|
||||
DELETE FROM notes_fts WHERE noteId = NEW.noteId;
|
||||
DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId;
|
||||
|
||||
-- Then insert the new entry into both FTS tables
|
||||
INSERT INTO notes_fts (noteId, title, content)
|
||||
SELECT
|
||||
NEW.noteId,
|
||||
NEW.title,
|
||||
COALESCE(b.content, '') -- Use empty string if blob doesn't exist yet
|
||||
FROM (SELECT NEW.noteId) AS note_select
|
||||
LEFT JOIN blobs b ON b.blobId = NEW.blobId;
|
||||
|
||||
INSERT INTO notes_fts_trigram (noteId, title, content)
|
||||
INSERT OR REPLACE INTO notes_fts (noteId, title, content)
|
||||
SELECT
|
||||
NEW.noteId,
|
||||
NEW.title,
|
||||
@@ -273,47 +250,35 @@ BEGIN
|
||||
LEFT JOIN blobs b ON b.blobId = NEW.blobId;
|
||||
END;
|
||||
|
||||
-- Trigger for UPDATE operations on notes table
|
||||
-- Handles: Regular UPDATE and the UPDATE part of upsert (ON CONFLICT DO UPDATE)
|
||||
-- Fires for ANY update to searchable notes to ensure FTS stays in sync
|
||||
-- Smart trigger for UPDATE operations on notes table
|
||||
-- Only fires when relevant fields actually change to reduce unnecessary work
|
||||
CREATE TRIGGER notes_fts_update
|
||||
AFTER UPDATE ON notes
|
||||
WHEN NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
||||
-- Fire on any change, not just specific columns, to handle all upsert scenarios
|
||||
WHEN (OLD.title != NEW.title OR OLD.type != NEW.type OR OLD.blobId != NEW.blobId OR
|
||||
OLD.isDeleted != NEW.isDeleted OR OLD.isProtected != NEW.isProtected)
|
||||
AND NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
||||
BEGIN
|
||||
-- Always delete the old entries from both FTS tables
|
||||
-- Remove old entry
|
||||
DELETE FROM notes_fts WHERE noteId = NEW.noteId;
|
||||
DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId;
|
||||
|
||||
-- Insert new entries into both FTS tables if note is not deleted and not protected
|
||||
INSERT INTO notes_fts (noteId, title, content)
|
||||
SELECT
|
||||
NEW.noteId,
|
||||
NEW.title,
|
||||
COALESCE(b.content, '') -- Use empty string if blob doesn't exist yet
|
||||
FROM (SELECT NEW.noteId) AS note_select
|
||||
LEFT JOIN blobs b ON b.blobId = NEW.blobId
|
||||
WHERE NEW.isDeleted = 0
|
||||
AND NEW.isProtected = 0;
|
||||
|
||||
INSERT INTO notes_fts_trigram (noteId, title, content)
|
||||
-- Add new entry if eligible
|
||||
INSERT OR REPLACE INTO notes_fts (noteId, title, content)
|
||||
SELECT
|
||||
NEW.noteId,
|
||||
NEW.title,
|
||||
COALESCE(b.content, '')
|
||||
FROM (SELECT NEW.noteId) AS note_select
|
||||
LEFT JOIN blobs b ON b.blobId = NEW.blobId
|
||||
WHERE NEW.isDeleted = 0
|
||||
AND NEW.isProtected = 0;
|
||||
WHERE NEW.isDeleted = 0 AND NEW.isProtected = 0;
|
||||
END;
|
||||
|
||||
-- Trigger for UPDATE operations on blobs
|
||||
-- Handles: Regular UPDATE and the UPDATE part of upsert (ON CONFLICT DO UPDATE)
|
||||
-- IMPORTANT: Uses INSERT OR REPLACE for efficiency with deduplicated blobs
|
||||
-- Smart trigger for UPDATE operations on blobs
|
||||
-- Only fires when content actually changes
|
||||
CREATE TRIGGER notes_fts_blob_update
|
||||
AFTER UPDATE ON blobs
|
||||
WHEN OLD.content != NEW.content
|
||||
BEGIN
|
||||
-- Update both FTS tables for all notes sharing this blob
|
||||
-- Update FTS table for all notes sharing this blob
|
||||
INSERT OR REPLACE INTO notes_fts (noteId, title, content)
|
||||
SELECT
|
||||
n.noteId,
|
||||
@@ -324,100 +289,11 @@ BEGIN
|
||||
AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
||||
AND n.isDeleted = 0
|
||||
AND n.isProtected = 0;
|
||||
|
||||
INSERT OR REPLACE INTO notes_fts_trigram (noteId, title, content)
|
||||
SELECT
|
||||
n.noteId,
|
||||
n.title,
|
||||
NEW.content
|
||||
FROM notes n
|
||||
WHERE n.blobId = NEW.blobId
|
||||
AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
||||
AND n.isDeleted = 0
|
||||
AND n.isProtected = 0;
|
||||
END;
|
||||
|
||||
-- Trigger for DELETE operations
|
||||
-- Trigger for DELETE operations (handles both hard delete and cleanup)
|
||||
CREATE TRIGGER notes_fts_delete
|
||||
AFTER DELETE ON notes
|
||||
BEGIN
|
||||
DELETE FROM notes_fts WHERE noteId = OLD.noteId;
|
||||
DELETE FROM notes_fts_trigram WHERE noteId = OLD.noteId;
|
||||
END;
|
||||
|
||||
-- Trigger for soft delete (isDeleted = 1)
|
||||
CREATE TRIGGER notes_fts_soft_delete
|
||||
AFTER UPDATE ON notes
|
||||
WHEN OLD.isDeleted = 0 AND NEW.isDeleted = 1
|
||||
BEGIN
|
||||
DELETE FROM notes_fts WHERE noteId = NEW.noteId;
|
||||
DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId;
|
||||
END;
|
||||
|
||||
-- Trigger for notes becoming protected
|
||||
-- Remove from FTS when a note becomes protected
|
||||
CREATE TRIGGER notes_fts_protect
|
||||
AFTER UPDATE ON notes
|
||||
WHEN OLD.isProtected = 0 AND NEW.isProtected = 1
|
||||
BEGIN
|
||||
DELETE FROM notes_fts WHERE noteId = NEW.noteId;
|
||||
DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId;
|
||||
END;
|
||||
|
||||
-- Trigger for notes becoming unprotected
|
||||
-- Add to FTS when a note becomes unprotected (if eligible)
|
||||
CREATE TRIGGER notes_fts_unprotect
|
||||
AFTER UPDATE ON notes
|
||||
WHEN OLD.isProtected = 1 AND NEW.isProtected = 0
|
||||
AND NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
||||
AND NEW.isDeleted = 0
|
||||
BEGIN
|
||||
DELETE FROM notes_fts WHERE noteId = NEW.noteId;
|
||||
DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId;
|
||||
|
||||
INSERT INTO notes_fts (noteId, title, content)
|
||||
SELECT
|
||||
NEW.noteId,
|
||||
NEW.title,
|
||||
COALESCE(b.content, '')
|
||||
FROM (SELECT NEW.noteId) AS note_select
|
||||
LEFT JOIN blobs b ON b.blobId = NEW.blobId;
|
||||
|
||||
INSERT INTO notes_fts_trigram (noteId, title, content)
|
||||
SELECT
|
||||
NEW.noteId,
|
||||
NEW.title,
|
||||
COALESCE(b.content, '')
|
||||
FROM (SELECT NEW.noteId) AS note_select
|
||||
LEFT JOIN blobs b ON b.blobId = NEW.blobId;
|
||||
END;
|
||||
|
||||
-- Trigger for INSERT operations on blobs
|
||||
-- Handles: INSERT, INSERT OR REPLACE, and the INSERT part of upsert
|
||||
-- Updates all notes that reference this blob (common during import and deduplication)
|
||||
CREATE TRIGGER notes_fts_blob_insert
|
||||
AFTER INSERT ON blobs
|
||||
BEGIN
|
||||
-- Update both FTS tables for all notes that reference this blob
|
||||
INSERT OR REPLACE INTO notes_fts (noteId, title, content)
|
||||
SELECT
|
||||
n.noteId,
|
||||
n.title,
|
||||
NEW.content
|
||||
FROM notes n
|
||||
WHERE n.blobId = NEW.blobId
|
||||
AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
||||
AND n.isDeleted = 0
|
||||
AND n.isProtected = 0;
|
||||
|
||||
INSERT OR REPLACE INTO notes_fts_trigram (noteId, title, content)
|
||||
SELECT
|
||||
n.noteId,
|
||||
n.title,
|
||||
NEW.content
|
||||
FROM notes n
|
||||
WHERE n.blobId = NEW.blobId
|
||||
AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
||||
AND n.isDeleted = 0
|
||||
AND n.isProtected = 0;
|
||||
END;
|
||||
|
||||
@@ -17,7 +17,18 @@ export default function addFTS5SearchAndPerformanceIndexes() {
|
||||
// Create FTS5 virtual table with porter tokenizer
|
||||
log.info("Creating FTS5 virtual table...");
|
||||
|
||||
// Set optimal SQLite pragmas for FTS5 operations with millions of notes
|
||||
sql.executeScript(`
|
||||
-- Memory and performance pragmas for large-scale FTS operations
|
||||
PRAGMA cache_size = -262144; -- 256MB cache for better performance
|
||||
PRAGMA temp_store = MEMORY; -- Use RAM for temporary storage
|
||||
PRAGMA mmap_size = 536870912; -- 512MB memory-mapped I/O
|
||||
PRAGMA synchronous = NORMAL; -- Faster writes with good safety
|
||||
PRAGMA journal_mode = WAL; -- Write-ahead logging for better concurrency
|
||||
PRAGMA wal_autocheckpoint = 1000; -- Auto-checkpoint every 1000 pages
|
||||
PRAGMA automatic_index = ON; -- Allow automatic indexes
|
||||
PRAGMA threads = 4; -- Use multiple threads for sorting
|
||||
|
||||
-- Drop existing FTS tables if they exist
|
||||
DROP TABLE IF EXISTS notes_fts;
|
||||
DROP TABLE IF EXISTS notes_fts_trigram;
|
||||
@@ -25,25 +36,26 @@ export default function addFTS5SearchAndPerformanceIndexes() {
|
||||
DROP TABLE IF EXISTS notes_fts_stats;
|
||||
DROP TABLE IF EXISTS notes_fts_aux;
|
||||
|
||||
-- Create FTS5 virtual table with porter tokenizer for stemming
|
||||
-- Create optimized FTS5 virtual table for millions of notes
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS notes_fts USING fts5(
|
||||
noteId UNINDEXED,
|
||||
title,
|
||||
content,
|
||||
tokenize = 'porter unicode61',
|
||||
prefix = '2 3' -- Index prefixes of 2 and 3 characters for faster prefix searches
|
||||
prefix = '2 3 4', -- Index prefixes of 2, 3, and 4 characters for faster prefix searches
|
||||
columnsize = 0, -- Reduce index size by not storing column sizes (saves ~25% space)
|
||||
detail = full -- Keep full detail for snippet generation
|
||||
);
|
||||
`);
|
||||
|
||||
log.info("Populating FTS5 table with existing note content...");
|
||||
|
||||
// Populate the FTS table with existing notes
|
||||
const batchSize = 1000;
|
||||
// Optimized population with batch inserts and better memory management
|
||||
const batchSize = 5000; // Larger batch size for better performance
|
||||
let processedCount = 0;
|
||||
|
||||
try {
|
||||
sql.transactional(() => {
|
||||
// Count eligible notes
|
||||
// Count eligible notes first
|
||||
const totalNotes = sql.getValue<number>(`
|
||||
SELECT COUNT(*)
|
||||
FROM notes n
|
||||
@@ -56,11 +68,18 @@ export default function addFTS5SearchAndPerformanceIndexes() {
|
||||
|
||||
log.info(`Found ${totalNotes} notes to index`);
|
||||
|
||||
// Insert notes in batches
|
||||
// Process in optimized batches using a prepared statement
|
||||
sql.transactional(() => {
|
||||
// Prepare statement for batch inserts
|
||||
const insertStmt = sql.prepare(`
|
||||
INSERT OR REPLACE INTO notes_fts (noteId, title, content)
|
||||
VALUES (?, ?, ?)
|
||||
`);
|
||||
|
||||
let offset = 0;
|
||||
while (offset < totalNotes) {
|
||||
sql.execute(`
|
||||
INSERT INTO notes_fts (noteId, title, content)
|
||||
// Fetch batch of notes
|
||||
const notesBatch = sql.getRows<{noteId: string, title: string, content: string}>(`
|
||||
SELECT
|
||||
n.noteId,
|
||||
n.title,
|
||||
@@ -75,13 +94,31 @@ export default function addFTS5SearchAndPerformanceIndexes() {
|
||||
LIMIT ? OFFSET ?
|
||||
`, [batchSize, offset]);
|
||||
|
||||
offset += batchSize;
|
||||
processedCount = Math.min(offset, totalNotes);
|
||||
if (!notesBatch || notesBatch.length === 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (processedCount % 10000 === 0) {
|
||||
log.info(`Indexed ${processedCount} of ${totalNotes} notes...`);
|
||||
// Batch insert using prepared statement
|
||||
for (const note of notesBatch) {
|
||||
insertStmt.run(note.noteId, note.title, note.content);
|
||||
}
|
||||
|
||||
offset += notesBatch.length;
|
||||
processedCount += notesBatch.length;
|
||||
|
||||
// Progress reporting every 10k notes
|
||||
if (processedCount % 10000 === 0 || processedCount === totalNotes) {
|
||||
log.info(`Indexed ${processedCount} of ${totalNotes} notes (${Math.round((processedCount / totalNotes) * 100)}%)...`);
|
||||
}
|
||||
|
||||
// Early exit if we processed fewer notes than batch size
|
||||
if (notesBatch.length < batchSize) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Finalize prepared statement
|
||||
insertStmt.finalize();
|
||||
});
|
||||
} catch (error) {
|
||||
log.error(`Failed to populate FTS index: ${error}`);
|
||||
@@ -106,7 +143,7 @@ export default function addFTS5SearchAndPerformanceIndexes() {
|
||||
sql.execute(`DROP TRIGGER IF EXISTS ${trigger}`);
|
||||
}
|
||||
|
||||
// Create triggers for notes table operations
|
||||
// Create optimized triggers for notes table operations
|
||||
sql.execute(`
|
||||
CREATE TRIGGER notes_fts_insert
|
||||
AFTER INSERT ON notes
|
||||
@@ -114,7 +151,8 @@ export default function addFTS5SearchAndPerformanceIndexes() {
|
||||
AND NEW.isDeleted = 0
|
||||
AND NEW.isProtected = 0
|
||||
BEGIN
|
||||
INSERT INTO notes_fts (noteId, title, content)
|
||||
-- Use INSERT OR REPLACE for better handling of duplicate entries
|
||||
INSERT OR REPLACE INTO notes_fts (noteId, title, content)
|
||||
SELECT
|
||||
NEW.noteId,
|
||||
NEW.title,
|
||||
@@ -127,12 +165,20 @@ export default function addFTS5SearchAndPerformanceIndexes() {
|
||||
sql.execute(`
|
||||
CREATE TRIGGER notes_fts_update
|
||||
AFTER UPDATE ON notes
|
||||
WHEN (
|
||||
-- Only fire when relevant fields change or status changes
|
||||
OLD.title != NEW.title OR
|
||||
OLD.type != NEW.type OR
|
||||
OLD.blobId != NEW.blobId OR
|
||||
OLD.isDeleted != NEW.isDeleted OR
|
||||
OLD.isProtected != NEW.isProtected
|
||||
)
|
||||
BEGIN
|
||||
-- Delete old entry
|
||||
-- Always remove old entry first
|
||||
DELETE FROM notes_fts WHERE noteId = OLD.noteId;
|
||||
|
||||
-- Insert new entry if eligible
|
||||
INSERT INTO notes_fts (noteId, title, content)
|
||||
-- Insert new entry if eligible (avoid redundant work)
|
||||
INSERT OR REPLACE INTO notes_fts (noteId, title, content)
|
||||
SELECT
|
||||
NEW.noteId,
|
||||
NEW.title,
|
||||
@@ -153,19 +199,14 @@ export default function addFTS5SearchAndPerformanceIndexes() {
|
||||
END;
|
||||
`);
|
||||
|
||||
// Create triggers for blob updates
|
||||
// Create optimized triggers for blob updates
|
||||
sql.execute(`
|
||||
CREATE TRIGGER blobs_fts_update
|
||||
AFTER UPDATE ON blobs
|
||||
WHEN OLD.content != NEW.content -- Only fire when content actually changes
|
||||
BEGIN
|
||||
-- Update all notes that reference this blob
|
||||
DELETE FROM notes_fts
|
||||
WHERE noteId IN (
|
||||
SELECT noteId FROM notes
|
||||
WHERE blobId = NEW.blobId
|
||||
);
|
||||
|
||||
INSERT INTO notes_fts (noteId, title, content)
|
||||
-- Use efficient INSERT OR REPLACE to update all notes referencing this blob
|
||||
INSERT OR REPLACE INTO notes_fts (noteId, title, content)
|
||||
SELECT
|
||||
n.noteId,
|
||||
n.title,
|
||||
@@ -182,7 +223,8 @@ export default function addFTS5SearchAndPerformanceIndexes() {
|
||||
CREATE TRIGGER blobs_fts_insert
|
||||
AFTER INSERT ON blobs
|
||||
BEGIN
|
||||
INSERT INTO notes_fts (noteId, title, content)
|
||||
-- Use INSERT OR REPLACE to handle potential race conditions
|
||||
INSERT OR REPLACE INTO notes_fts (noteId, title, content)
|
||||
SELECT
|
||||
n.noteId,
|
||||
n.title,
|
||||
@@ -201,16 +243,31 @@ export default function addFTS5SearchAndPerformanceIndexes() {
|
||||
log.info("Optimizing FTS5 index...");
|
||||
sql.execute(`INSERT INTO notes_fts(notes_fts) VALUES('optimize')`);
|
||||
|
||||
// Set essential SQLite pragmas for better performance
|
||||
// Set comprehensive SQLite pragmas optimized for millions of notes
|
||||
log.info("Configuring SQLite pragmas for large-scale FTS performance...");
|
||||
|
||||
sql.executeScript(`
|
||||
-- Increase cache size (50MB)
|
||||
PRAGMA cache_size = -50000;
|
||||
-- Memory Management (Critical for large databases)
|
||||
PRAGMA cache_size = -262144; -- 256MB cache (was 50MB) - critical for FTS performance
|
||||
PRAGMA temp_store = MEMORY; -- Use memory for temporary tables and indices
|
||||
PRAGMA mmap_size = 536870912; -- 512MB memory-mapped I/O for better read performance
|
||||
|
||||
-- Use memory for temp storage
|
||||
PRAGMA temp_store = 2;
|
||||
-- Write Optimization (Important for batch operations)
|
||||
PRAGMA synchronous = NORMAL; -- Balance between safety and performance (was FULL)
|
||||
PRAGMA journal_mode = WAL; -- Write-Ahead Logging for better concurrency
|
||||
PRAGMA wal_autocheckpoint = 1000; -- Checkpoint every 1000 pages for memory management
|
||||
|
||||
-- Run ANALYZE on FTS tables
|
||||
-- Query Optimization (Essential for FTS queries)
|
||||
PRAGMA automatic_index = ON; -- Allow SQLite to create automatic indexes
|
||||
PRAGMA optimize; -- Update query planner statistics
|
||||
|
||||
-- FTS-Specific Optimizations
|
||||
PRAGMA threads = 4; -- Use multiple threads for FTS operations (if available)
|
||||
|
||||
-- Run comprehensive ANALYZE on all FTS-related tables
|
||||
ANALYZE notes_fts;
|
||||
ANALYZE notes;
|
||||
ANALYZE blobs;
|
||||
`);
|
||||
|
||||
log.info("FTS5 migration completed successfully");
|
||||
|
||||
@@ -81,18 +81,7 @@ class NoteContentFulltextExp extends Expression {
|
||||
// Try to use FTS5 if available for better performance
|
||||
if (ftsSearchService.checkFTS5Availability() && this.canUseFTS5()) {
|
||||
try {
|
||||
// Performance comparison logging for FTS5 vs traditional search
|
||||
const searchQuery = this.tokens.join(" ");
|
||||
const isQuickSearch = searchContext.fastSearch === false; // quick-search sets fastSearch to false
|
||||
if (isQuickSearch) {
|
||||
log.info(`[QUICK-SEARCH-COMPARISON] Starting comparison for query: "${searchQuery}" with operator: ${this.operator}`);
|
||||
}
|
||||
|
||||
// Check if we need to search protected notes
|
||||
const searchProtected = protectedSessionService.isProtectedSessionAvailable();
|
||||
|
||||
// Time FTS5 search
|
||||
const ftsStartTime = Date.now();
|
||||
// Use FTS5 for optimized search
|
||||
const noteIdSet = inputNoteSet.getNoteIds();
|
||||
const ftsResults = ftsSearchService.searchSync(
|
||||
this.tokens,
|
||||
@@ -103,8 +92,6 @@ class NoteContentFulltextExp extends Expression {
|
||||
searchProtected: false // FTS5 doesn't index protected notes
|
||||
}
|
||||
);
|
||||
const ftsEndTime = Date.now();
|
||||
const ftsTime = ftsEndTime - ftsStartTime;
|
||||
|
||||
// Add FTS results to note set
|
||||
for (const result of ftsResults) {
|
||||
@@ -113,53 +100,8 @@ class NoteContentFulltextExp extends Expression {
|
||||
}
|
||||
}
|
||||
|
||||
// For quick-search, also run traditional search for comparison
|
||||
if (isQuickSearch) {
|
||||
const traditionalStartTime = Date.now();
|
||||
|
||||
// Log the input set size for debugging
|
||||
log.info(`[QUICK-SEARCH-COMPARISON] Input set size: ${inputNoteSet.notes.length} notes`);
|
||||
|
||||
// Run traditional search for comparison
|
||||
// Use the dedicated comparison method that always runs the full search
|
||||
const traditionalResults = this.executeTraditionalSearch(inputNoteSet, searchContext);
|
||||
|
||||
const traditionalEndTime = Date.now();
|
||||
const traditionalTime = traditionalEndTime - traditionalStartTime;
|
||||
|
||||
// Log performance comparison
|
||||
const speedup = traditionalTime > 0 ? (traditionalTime / ftsTime).toFixed(2) : "N/A";
|
||||
log.info(`[QUICK-SEARCH-COMPARISON] ===== Results for query: "${searchQuery}" =====`);
|
||||
log.info(`[QUICK-SEARCH-COMPARISON] FTS5 search: ${ftsTime}ms, found ${ftsResults.length} results`);
|
||||
log.info(`[QUICK-SEARCH-COMPARISON] Traditional search: ${traditionalTime}ms, found ${traditionalResults.notes.length} results`);
|
||||
log.info(`[QUICK-SEARCH-COMPARISON] FTS5 is ${speedup}x faster (saved ${traditionalTime - ftsTime}ms)`);
|
||||
|
||||
// Check if results match
|
||||
const ftsNoteIds = new Set(ftsResults.map(r => r.noteId));
|
||||
const traditionalNoteIds = new Set(traditionalResults.notes.map(n => n.noteId));
|
||||
const matchingResults = ftsNoteIds.size === traditionalNoteIds.size &&
|
||||
Array.from(ftsNoteIds).every(id => traditionalNoteIds.has(id));
|
||||
|
||||
if (!matchingResults) {
|
||||
log.info(`[QUICK-SEARCH-COMPARISON] Results differ! FTS5: ${ftsNoteIds.size} notes, Traditional: ${traditionalNoteIds.size} notes`);
|
||||
|
||||
// Find differences
|
||||
const onlyInFTS = Array.from(ftsNoteIds).filter(id => !traditionalNoteIds.has(id));
|
||||
const onlyInTraditional = Array.from(traditionalNoteIds).filter(id => !ftsNoteIds.has(id));
|
||||
|
||||
if (onlyInFTS.length > 0) {
|
||||
log.info(`[QUICK-SEARCH-COMPARISON] Only in FTS5: ${onlyInFTS.slice(0, 5).join(", ")}${onlyInFTS.length > 5 ? "..." : ""}`);
|
||||
}
|
||||
if (onlyInTraditional.length > 0) {
|
||||
log.info(`[QUICK-SEARCH-COMPARISON] Only in Traditional: ${onlyInTraditional.slice(0, 5).join(", ")}${onlyInTraditional.length > 5 ? "..." : ""}`);
|
||||
}
|
||||
} else {
|
||||
log.info(`[QUICK-SEARCH-COMPARISON] Results match perfectly! ✓`);
|
||||
}
|
||||
log.info(`[QUICK-SEARCH-COMPARISON] ========================================`);
|
||||
}
|
||||
|
||||
// If we need to search protected notes, use the separate method
|
||||
const searchProtected = protectedSessionService.isProtectedSessionAvailable();
|
||||
if (searchProtected) {
|
||||
const protectedResults = ftsSearchService.searchProtectedNotesSync(
|
||||
this.tokens,
|
||||
@@ -258,24 +200,6 @@ class NoteContentFulltextExp extends Expression {
|
||||
return resultNoteSet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes traditional search for comparison purposes
|
||||
* This always runs the full traditional search regardless of operator
|
||||
*/
|
||||
private executeTraditionalSearch(inputNoteSet: NoteSet, searchContext: SearchContext): NoteSet {
|
||||
const resultNoteSet = new NoteSet();
|
||||
|
||||
for (const row of sql.iterateRows<SearchRow>(`
|
||||
SELECT noteId, type, mime, content, isProtected
|
||||
FROM notes JOIN blobs USING (blobId)
|
||||
WHERE type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
||||
AND isDeleted = 0
|
||||
AND LENGTH(content) < ${MAX_SEARCH_CONTENT_SIZE}`)) {
|
||||
this.findInText(row, inputNoteSet, resultNoteSet);
|
||||
}
|
||||
|
||||
return resultNoteSet;
|
||||
}
|
||||
|
||||
findInText({ noteId, isProtected, content, type, mime }: SearchRow, inputNoteSet: NoteSet, resultNoteSet: NoteSet) {
|
||||
if (!inputNoteSet.hasNoteId(noteId) || !(noteId in becca.notes)) {
|
||||
|
||||
@@ -34,6 +34,7 @@ describe('FTS5 Search Service', () => {
|
||||
getRows: vi.fn(),
|
||||
getColumn: vi.fn(),
|
||||
execute: vi.fn(),
|
||||
prepare: vi.fn(),
|
||||
iterateRows: vi.fn(),
|
||||
transactional: vi.fn((fn: Function) => fn())
|
||||
};
|
||||
@@ -253,10 +254,19 @@ describe('FTS5 Search Service', () => {
|
||||
];
|
||||
mockSql.getRows.mockReturnValue(missingNotes);
|
||||
|
||||
// Mock prepared statement
|
||||
const mockPreparedStatement = {
|
||||
run: vi.fn(),
|
||||
finalize: vi.fn()
|
||||
};
|
||||
mockSql.prepare.mockReturnValue(mockPreparedStatement);
|
||||
|
||||
const count = ftsSearchService.syncMissingNotes();
|
||||
|
||||
expect(count).toBe(2);
|
||||
expect(mockSql.execute).toHaveBeenCalledTimes(2);
|
||||
expect(mockSql.prepare).toHaveBeenCalledTimes(1);
|
||||
expect(mockPreparedStatement.run).toHaveBeenCalledTimes(2);
|
||||
expect(mockPreparedStatement.finalize).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('should optimize index', () => {
|
||||
|
||||
@@ -70,15 +70,30 @@ const FTS_CONFIG = {
|
||||
*/
|
||||
class FTSSearchService {
|
||||
private isFTS5Available: boolean | null = null;
|
||||
private checkingAvailability = false;
|
||||
|
||||
/**
|
||||
* Check if FTS5 is available and properly configured
|
||||
* Thread-safe implementation to prevent race conditions
|
||||
*/
|
||||
checkFTS5Availability(): boolean {
|
||||
// Return cached result if available
|
||||
if (this.isFTS5Available !== null) {
|
||||
return this.isFTS5Available;
|
||||
}
|
||||
|
||||
// Prevent concurrent checks
|
||||
if (this.checkingAvailability) {
|
||||
// Wait for ongoing check to complete by checking again after a short delay
|
||||
while (this.checkingAvailability && this.isFTS5Available === null) {
|
||||
// This is a simple spin-wait; in a real async context, you'd use proper synchronization
|
||||
continue;
|
||||
}
|
||||
return this.isFTS5Available ?? false;
|
||||
}
|
||||
|
||||
this.checkingAvailability = true;
|
||||
|
||||
try {
|
||||
// Check if FTS5 extension is available
|
||||
const result = sql.getRow(`
|
||||
@@ -101,6 +116,8 @@ class FTSSearchService {
|
||||
|
||||
if (!this.isFTS5Available) {
|
||||
log.info("FTS5 table not found, full-text search not available");
|
||||
} else {
|
||||
log.info("FTS5 full-text search is available and configured");
|
||||
}
|
||||
|
||||
return this.isFTS5Available;
|
||||
@@ -108,6 +125,8 @@ class FTSSearchService {
|
||||
log.error(`Error checking FTS5 availability: ${error}`);
|
||||
this.isFTS5Available = false;
|
||||
return false;
|
||||
} finally {
|
||||
this.checkingAvailability = false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -268,14 +287,19 @@ class FTSSearchService {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Insert missing notes in batches
|
||||
// Insert missing notes using efficient batch processing
|
||||
sql.transactional(() => {
|
||||
for (const note of missingNotes) {
|
||||
sql.execute(`
|
||||
INSERT INTO notes_fts (noteId, title, content)
|
||||
// Use prepared statement for better performance
|
||||
const insertStmt = sql.prepare(`
|
||||
INSERT OR REPLACE INTO notes_fts (noteId, title, content)
|
||||
VALUES (?, ?, ?)
|
||||
`, [note.noteId, note.title, note.content]);
|
||||
`);
|
||||
|
||||
for (const note of missingNotes) {
|
||||
insertStmt.run(note.noteId, note.title, note.content);
|
||||
}
|
||||
|
||||
insertStmt.finalize();
|
||||
});
|
||||
|
||||
log.info(`Synced ${missingNotes.length} missing notes to FTS index`);
|
||||
|
||||
@@ -44,6 +44,9 @@ async function initDbConnection() {
|
||||
|
||||
await migrationService.migrateIfNecessary();
|
||||
|
||||
// Initialize optimized SQLite pragmas for FTS and large database performance
|
||||
initializeFTSPragmas();
|
||||
|
||||
sql.execute('CREATE TEMP TABLE "param_list" (`paramId` TEXT NOT NULL PRIMARY KEY)');
|
||||
|
||||
sql.execute(`
|
||||
@@ -185,6 +188,42 @@ function setDbAsInitialized() {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize SQLite pragmas optimized for FTS5 and large databases
|
||||
*/
|
||||
function initializeFTSPragmas() {
|
||||
if (config.General.readOnly) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
log.info("Setting SQLite pragmas for FTS5 and large database optimization...");
|
||||
|
||||
sql.executeScript(`
|
||||
-- Memory Management (Critical for FTS performance with millions of notes)
|
||||
PRAGMA cache_size = -262144; -- 256MB cache for better query performance
|
||||
PRAGMA temp_store = MEMORY; -- Use memory for temporary tables and indices
|
||||
PRAGMA mmap_size = 536870912; -- 512MB memory-mapped I/O for better read performance
|
||||
|
||||
-- Write Optimization (Better for concurrent operations)
|
||||
PRAGMA synchronous = NORMAL; -- Balance safety and performance (FULL is too slow for large operations)
|
||||
PRAGMA journal_mode = WAL; -- Write-Ahead Logging for better concurrency
|
||||
PRAGMA wal_autocheckpoint = 1000; -- Checkpoint every 1000 pages for memory management
|
||||
|
||||
-- Query Optimization (Essential for complex FTS queries)
|
||||
PRAGMA automatic_index = ON; -- Allow SQLite to create automatic indexes when beneficial
|
||||
|
||||
-- FTS-Specific Optimizations
|
||||
PRAGMA threads = 4; -- Use multiple threads for FTS operations if available
|
||||
`);
|
||||
|
||||
log.info("FTS pragmas initialized successfully");
|
||||
} catch (error) {
|
||||
log.error(`Failed to initialize FTS pragmas: ${error}`);
|
||||
// Don't throw - continue with default settings
|
||||
}
|
||||
}
|
||||
|
||||
function optimize() {
|
||||
if (config.General.readOnly) {
|
||||
return;
|
||||
|
||||
Reference in New Issue
Block a user