From 053f722cb8bcdd8c68af252704cb52fa8df0a5f1 Mon Sep 17 00:00:00 2001 From: perf3ct Date: Sun, 31 Aug 2025 03:15:29 +0000 Subject: [PATCH] feat(search): try to get fts search to work in large environments --- apps/server/src/assets/db/schema.sql | 80 +++- .../src/migrations/0234__add_fts5_search.ts | 137 +++++- .../expressions/note_content_fulltext.ts | 28 +- apps/server/src/services/search/fts_search.ts | 317 ++++++++++--- package.json | 1 + scripts/stress-test-native-simple.ts | 370 +++++++++++++++ scripts/stress-test-native.ts | 421 ++++++++++++++++++ 7 files changed, 1251 insertions(+), 103 deletions(-) create mode 100644 scripts/stress-test-native-simple.ts create mode 100644 scripts/stress-test-native.ts diff --git a/apps/server/src/assets/db/schema.sql b/apps/server/src/assets/db/schema.sql index 887701167..f53dc18c3 100644 --- a/apps/server/src/assets/db/schema.sql +++ b/apps/server/src/assets/db/schema.sql @@ -219,7 +219,7 @@ CREATE TABLE IF NOT EXISTS sessions ( ); -- FTS5 Full-Text Search Support --- Create FTS5 virtual table for full-text searching +-- Create FTS5 virtual table with porter stemming for word-based searches CREATE VIRTUAL TABLE notes_fts USING fts5( noteId UNINDEXED, title, @@ -227,6 +227,15 @@ CREATE VIRTUAL TABLE notes_fts USING fts5( tokenize = 'porter unicode61' ); +-- Create FTS5 virtual table with trigram tokenizer for substring searches +CREATE VIRTUAL TABLE notes_fts_trigram USING fts5( + noteId UNINDEXED, + title, + content, + tokenize = 'trigram', + detail = 'none' +); + -- Triggers to keep FTS table synchronized with notes -- IMPORTANT: These triggers must handle all SQL operations including: -- - Regular INSERT/UPDATE/DELETE @@ -242,10 +251,11 @@ WHEN NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') AND NEW.isDeleted = 0 AND NEW.isProtected = 0 BEGIN - -- First delete any existing FTS entry (in case of INSERT OR REPLACE) + -- First delete any existing FTS entries (in case of INSERT OR REPLACE) DELETE FROM notes_fts WHERE noteId = NEW.noteId; + DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId; - -- Then insert the new entry, using LEFT JOIN to handle missing blobs + -- Then insert the new entry into both FTS tables INSERT INTO notes_fts (noteId, title, content) SELECT NEW.noteId, @@ -253,6 +263,14 @@ BEGIN COALESCE(b.content, '') -- Use empty string if blob doesn't exist yet FROM (SELECT NEW.noteId) AS note_select LEFT JOIN blobs b ON b.blobId = NEW.blobId; + + INSERT INTO notes_fts_trigram (noteId, title, content) + SELECT + NEW.noteId, + NEW.title, + COALESCE(b.content, '') + FROM (SELECT NEW.noteId) AS note_select + LEFT JOIN blobs b ON b.blobId = NEW.blobId; END; -- Trigger for UPDATE operations on notes table @@ -263,10 +281,11 @@ AFTER UPDATE ON notes WHEN NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') -- Fire on any change, not just specific columns, to handle all upsert scenarios BEGIN - -- Always delete the old entry + -- Always delete the old entries from both FTS tables DELETE FROM notes_fts WHERE noteId = NEW.noteId; + DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId; - -- Insert new entry if note is not deleted and not protected + -- Insert new entries into both FTS tables if note is not deleted and not protected INSERT INTO notes_fts (noteId, title, content) SELECT NEW.noteId, @@ -276,6 +295,16 @@ BEGIN LEFT JOIN blobs b ON b.blobId = NEW.blobId WHERE NEW.isDeleted = 0 AND NEW.isProtected = 0; + + INSERT INTO notes_fts_trigram (noteId, title, content) + SELECT + NEW.noteId, + NEW.title, + COALESCE(b.content, '') + FROM (SELECT NEW.noteId) AS note_select + LEFT JOIN blobs b ON b.blobId = NEW.blobId + WHERE NEW.isDeleted = 0 + AND NEW.isProtected = 0; END; -- Trigger for UPDATE operations on blobs @@ -284,8 +313,7 @@ END; CREATE TRIGGER notes_fts_blob_update AFTER UPDATE ON blobs BEGIN - -- Use INSERT OR REPLACE for atomic update of all notes sharing this blob - -- This is more efficient than DELETE + INSERT when many notes share the same blob + -- Update both FTS tables for all notes sharing this blob INSERT OR REPLACE INTO notes_fts (noteId, title, content) SELECT n.noteId, @@ -296,6 +324,17 @@ BEGIN AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') AND n.isDeleted = 0 AND n.isProtected = 0; + + INSERT OR REPLACE INTO notes_fts_trigram (noteId, title, content) + SELECT + n.noteId, + n.title, + NEW.content + FROM notes n + WHERE n.blobId = NEW.blobId + AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0; END; -- Trigger for DELETE operations @@ -303,6 +342,7 @@ CREATE TRIGGER notes_fts_delete AFTER DELETE ON notes BEGIN DELETE FROM notes_fts WHERE noteId = OLD.noteId; + DELETE FROM notes_fts_trigram WHERE noteId = OLD.noteId; END; -- Trigger for soft delete (isDeleted = 1) @@ -311,6 +351,7 @@ AFTER UPDATE ON notes WHEN OLD.isDeleted = 0 AND NEW.isDeleted = 1 BEGIN DELETE FROM notes_fts WHERE noteId = NEW.noteId; + DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId; END; -- Trigger for notes becoming protected @@ -320,6 +361,7 @@ AFTER UPDATE ON notes WHEN OLD.isProtected = 0 AND NEW.isProtected = 1 BEGIN DELETE FROM notes_fts WHERE noteId = NEW.noteId; + DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId; END; -- Trigger for notes becoming unprotected @@ -331,6 +373,7 @@ WHEN OLD.isProtected = 1 AND NEW.isProtected = 0 AND NEW.isDeleted = 0 BEGIN DELETE FROM notes_fts WHERE noteId = NEW.noteId; + DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId; INSERT INTO notes_fts (noteId, title, content) SELECT @@ -339,6 +382,14 @@ BEGIN COALESCE(b.content, '') FROM (SELECT NEW.noteId) AS note_select LEFT JOIN blobs b ON b.blobId = NEW.blobId; + + INSERT INTO notes_fts_trigram (noteId, title, content) + SELECT + NEW.noteId, + NEW.title, + COALESCE(b.content, '') + FROM (SELECT NEW.noteId) AS note_select + LEFT JOIN blobs b ON b.blobId = NEW.blobId; END; -- Trigger for INSERT operations on blobs @@ -347,9 +398,7 @@ END; CREATE TRIGGER notes_fts_blob_insert AFTER INSERT ON blobs BEGIN - -- Use INSERT OR REPLACE to handle both new and existing FTS entries - -- This is crucial for blob deduplication where multiple notes may already - -- exist that reference this blob before the blob itself is created + -- Update both FTS tables for all notes that reference this blob INSERT OR REPLACE INTO notes_fts (noteId, title, content) SELECT n.noteId, @@ -360,4 +409,15 @@ BEGIN AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') AND n.isDeleted = 0 AND n.isProtected = 0; + + INSERT OR REPLACE INTO notes_fts_trigram (noteId, title, content) + SELECT + n.noteId, + n.title, + NEW.content + FROM notes n + WHERE n.blobId = NEW.blobId + AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0; END; diff --git a/apps/server/src/migrations/0234__add_fts5_search.ts b/apps/server/src/migrations/0234__add_fts5_search.ts index f6f5c0005..47fbb4e04 100644 --- a/apps/server/src/migrations/0234__add_fts5_search.ts +++ b/apps/server/src/migrations/0234__add_fts5_search.ts @@ -18,20 +18,33 @@ export default function addFTS5SearchAndPerformanceIndexes() { // Part 1: FTS5 Setup log.info("Creating FTS5 virtual table for full-text search..."); - // Create FTS5 virtual table - // We store noteId, title, and content for searching - // The 'tokenize' option uses porter stemming for better search results + // Create FTS5 virtual tables + // We create two FTS tables for different search strategies: + // 1. notes_fts: Uses porter stemming for word-based searches + // 2. notes_fts_trigram: Uses trigram tokenizer for substring searches + sql.executeScript(` - -- Drop existing FTS table if it exists (for re-running migration in dev) + -- Drop existing FTS tables if they exist (for re-running migration in dev) DROP TABLE IF EXISTS notes_fts; + DROP TABLE IF EXISTS notes_fts_trigram; - -- Create FTS5 virtual table + -- Create FTS5 virtual table with porter stemming for word-based searches CREATE VIRTUAL TABLE IF NOT EXISTS notes_fts USING fts5( noteId UNINDEXED, title, content, tokenize = 'porter unicode61' ); + + -- Create FTS5 virtual table with trigram tokenizer for substring searches + -- detail='none' reduces storage by ~50% since we don't need snippets for substring search + CREATE VIRTUAL TABLE IF NOT EXISTS notes_fts_trigram USING fts5( + noteId UNINDEXED, + title, + content, + tokenize = 'trigram', + detail = 'none' + ); `); log.info("Populating FTS5 table with existing note content..."); @@ -78,10 +91,19 @@ export default function addFTS5SearchAndPerformanceIndexes() { // For HTML content, we'll strip tags in the search service // For now, just insert the raw content + + // Insert into porter FTS for word-based searches sql.execute(` INSERT INTO notes_fts (noteId, title, content) VALUES (?, ?, ?) `, [note.noteId, note.title, processedContent]); + + // Also insert into trigram FTS for substring searches + sql.execute(` + INSERT INTO notes_fts_trigram (noteId, title, content) + VALUES (?, ?, ?) + `, [note.noteId, note.title, processedContent]); + processedCount++; } } @@ -131,10 +153,11 @@ export default function addFTS5SearchAndPerformanceIndexes() { AND NEW.isDeleted = 0 AND NEW.isProtected = 0 BEGIN - -- First delete any existing FTS entry (in case of INSERT OR REPLACE) + -- First delete any existing FTS entries (in case of INSERT OR REPLACE) DELETE FROM notes_fts WHERE noteId = NEW.noteId; + DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId; - -- Then insert the new entry, using LEFT JOIN to handle missing blobs + -- Then insert the new entry into both FTS tables, using LEFT JOIN to handle missing blobs INSERT INTO notes_fts (noteId, title, content) SELECT NEW.noteId, @@ -142,6 +165,14 @@ export default function addFTS5SearchAndPerformanceIndexes() { COALESCE(b.content, '') -- Use empty string if blob doesn't exist yet FROM (SELECT NEW.noteId) AS note_select LEFT JOIN blobs b ON b.blobId = NEW.blobId; + + INSERT INTO notes_fts_trigram (noteId, title, content) + SELECT + NEW.noteId, + NEW.title, + COALESCE(b.content, '') + FROM (SELECT NEW.noteId) AS note_select + LEFT JOIN blobs b ON b.blobId = NEW.blobId; END `); @@ -153,10 +184,11 @@ export default function addFTS5SearchAndPerformanceIndexes() { WHEN NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') -- Fire on any change, not just specific columns, to handle all upsert scenarios BEGIN - -- Always delete the old entry + -- Always delete the old entries from both FTS tables DELETE FROM notes_fts WHERE noteId = NEW.noteId; + DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId; - -- Insert new entry if note is not deleted and not protected + -- Insert new entry into both FTS tables if note is not deleted and not protected INSERT INTO notes_fts (noteId, title, content) SELECT NEW.noteId, @@ -166,6 +198,16 @@ export default function addFTS5SearchAndPerformanceIndexes() { LEFT JOIN blobs b ON b.blobId = NEW.blobId WHERE NEW.isDeleted = 0 AND NEW.isProtected = 0; + + INSERT INTO notes_fts_trigram (noteId, title, content) + SELECT + NEW.noteId, + NEW.title, + COALESCE(b.content, '') + FROM (SELECT NEW.noteId) AS note_select + LEFT JOIN blobs b ON b.blobId = NEW.blobId + WHERE NEW.isDeleted = 0 + AND NEW.isProtected = 0; END `); @@ -175,6 +217,7 @@ export default function addFTS5SearchAndPerformanceIndexes() { AFTER DELETE ON notes BEGIN DELETE FROM notes_fts WHERE noteId = OLD.noteId; + DELETE FROM notes_fts_trigram WHERE noteId = OLD.noteId; END `); @@ -185,6 +228,7 @@ export default function addFTS5SearchAndPerformanceIndexes() { WHEN OLD.isDeleted = 0 AND NEW.isDeleted = 1 BEGIN DELETE FROM notes_fts WHERE noteId = NEW.noteId; + DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId; END `); @@ -195,6 +239,7 @@ export default function addFTS5SearchAndPerformanceIndexes() { WHEN OLD.isProtected = 0 AND NEW.isProtected = 1 BEGIN DELETE FROM notes_fts WHERE noteId = NEW.noteId; + DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId; END `); @@ -207,6 +252,7 @@ export default function addFTS5SearchAndPerformanceIndexes() { AND NEW.isDeleted = 0 BEGIN DELETE FROM notes_fts WHERE noteId = NEW.noteId; + DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId; INSERT INTO notes_fts (noteId, title, content) SELECT @@ -215,6 +261,14 @@ export default function addFTS5SearchAndPerformanceIndexes() { COALESCE(b.content, '') FROM (SELECT NEW.noteId) AS note_select LEFT JOIN blobs b ON b.blobId = NEW.blobId; + + INSERT INTO notes_fts_trigram (noteId, title, content) + SELECT + NEW.noteId, + NEW.title, + COALESCE(b.content, '') + FROM (SELECT NEW.noteId) AS note_select + LEFT JOIN blobs b ON b.blobId = NEW.blobId; END `); @@ -224,7 +278,7 @@ export default function addFTS5SearchAndPerformanceIndexes() { CREATE TRIGGER notes_fts_blob_insert AFTER INSERT ON blobs BEGIN - -- Use INSERT OR REPLACE for atomic update + -- Use INSERT OR REPLACE for atomic update in both FTS tables -- This handles the case where FTS entries may already exist INSERT OR REPLACE INTO notes_fts (noteId, title, content) SELECT @@ -236,6 +290,17 @@ export default function addFTS5SearchAndPerformanceIndexes() { AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') AND n.isDeleted = 0 AND n.isProtected = 0; + + INSERT OR REPLACE INTO notes_fts_trigram (noteId, title, content) + SELECT + n.noteId, + n.title, + NEW.content + FROM notes n + WHERE n.blobId = NEW.blobId + AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0; END `); @@ -245,7 +310,7 @@ export default function addFTS5SearchAndPerformanceIndexes() { CREATE TRIGGER notes_fts_blob_update AFTER UPDATE ON blobs BEGIN - -- Use INSERT OR REPLACE for atomic update + -- Use INSERT OR REPLACE for atomic update in both FTS tables INSERT OR REPLACE INTO notes_fts (noteId, title, content) SELECT n.noteId, @@ -256,17 +321,28 @@ export default function addFTS5SearchAndPerformanceIndexes() { AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') AND n.isDeleted = 0 AND n.isProtected = 0; + + INSERT OR REPLACE INTO notes_fts_trigram (noteId, title, content) + SELECT + n.noteId, + n.title, + NEW.content + FROM notes n + WHERE n.blobId = NEW.blobId + AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0; END `); log.info("FTS5 setup completed successfully"); - // Final cleanup: ensure all eligible notes are indexed + // Final cleanup: ensure all eligible notes are indexed in both FTS tables // This catches any edge cases where notes might have been missed log.info("Running final FTS index cleanup..."); - // First check for missing notes - const missingCount = sql.getValue(` + // Check and fix porter FTS table + const missingPorterCount = sql.getValue(` SELECT COUNT(*) FROM notes n LEFT JOIN blobs b ON n.blobId = b.blobId WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') @@ -276,8 +352,7 @@ export default function addFTS5SearchAndPerformanceIndexes() { AND NOT EXISTS (SELECT 1 FROM notes_fts WHERE noteId = n.noteId) `) || 0; - if (missingCount > 0) { - // Insert missing notes + if (missingPorterCount > 0) { sql.execute(` WITH missing_notes AS ( SELECT n.noteId, n.title, b.content @@ -292,12 +367,36 @@ export default function addFTS5SearchAndPerformanceIndexes() { INSERT INTO notes_fts (noteId, title, content) SELECT noteId, title, content FROM missing_notes `); + log.info(`Indexed ${missingPorterCount} additional notes in porter FTS during cleanup`); } - const cleanupCount = missingCount; + // Check and fix trigram FTS table + const missingTrigramCount = sql.getValue(` + SELECT COUNT(*) FROM notes n + LEFT JOIN blobs b ON n.blobId = b.blobId + WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0 + AND b.content IS NOT NULL + AND NOT EXISTS (SELECT 1 FROM notes_fts_trigram WHERE noteId = n.noteId) + `) || 0; - if (cleanupCount && cleanupCount > 0) { - log.info(`Indexed ${cleanupCount} additional notes during cleanup`); + if (missingTrigramCount > 0) { + sql.execute(` + WITH missing_notes AS ( + SELECT n.noteId, n.title, b.content + FROM notes n + LEFT JOIN blobs b ON n.blobId = b.blobId + WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0 + AND b.content IS NOT NULL + AND NOT EXISTS (SELECT 1 FROM notes_fts_trigram WHERE noteId = n.noteId) + ) + INSERT INTO notes_fts_trigram (noteId, title, content) + SELECT noteId, title, content FROM missing_notes + `); + log.info(`Indexed ${missingTrigramCount} additional notes in trigram FTS during cleanup`); } // ======================================== diff --git a/apps/server/src/services/search/expressions/note_content_fulltext.ts b/apps/server/src/services/search/expressions/note_content_fulltext.ts index 85ede0c54..c836d9ac3 100644 --- a/apps/server/src/services/search/expressions/note_content_fulltext.ts +++ b/apps/server/src/services/search/expressions/note_content_fulltext.ts @@ -116,10 +116,13 @@ class NoteContentFulltextExp extends Expression { // For quick-search, also run traditional search for comparison if (isQuickSearch) { const traditionalStartTime = Date.now(); - const traditionalNoteSet = new NoteSet(); - // Run traditional search (use the fallback method) - const traditionalResults = this.executeWithFallback(inputNoteSet, traditionalNoteSet, searchContext); + // Log the input set size for debugging + log.info(`[QUICK-SEARCH-COMPARISON] Input set size: ${inputNoteSet.notes.length} notes`); + + // Run traditional search for comparison + // Use the dedicated comparison method that always runs the full search + const traditionalResults = this.executeTraditionalSearch(inputNoteSet, searchContext); const traditionalEndTime = Date.now(); const traditionalTime = traditionalEndTime - traditionalStartTime; @@ -254,6 +257,25 @@ class NoteContentFulltextExp extends Expression { } return resultNoteSet; } + + /** + * Executes traditional search for comparison purposes + * This always runs the full traditional search regardless of operator + */ + private executeTraditionalSearch(inputNoteSet: NoteSet, searchContext: SearchContext): NoteSet { + const resultNoteSet = new NoteSet(); + + for (const row of sql.iterateRows(` + SELECT noteId, type, mime, content, isProtected + FROM notes JOIN blobs USING (blobId) + WHERE type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND isDeleted = 0 + AND LENGTH(content) < ${MAX_SEARCH_CONTENT_SIZE}`)) { + this.findInText(row, inputNoteSet, resultNoteSet); + } + + return resultNoteSet; + } findInText({ noteId, isProtected, content, type, mime }: SearchRow, inputNoteSet: NoteSet, resultNoteSet: NoteSet) { if (!inputNoteSet.hasNoteId(noteId) || !(noteId in becca.notes)) { diff --git a/apps/server/src/services/search/fts_search.ts b/apps/server/src/services/search/fts_search.ts index 82031953f..96474a93d 100644 --- a/apps/server/src/services/search/fts_search.ts +++ b/apps/server/src/services/search/fts_search.ts @@ -92,18 +92,25 @@ class FTSSearchService { } try { - // Check if FTS5 module is available - const result = sql.getValue(` + // Check if both FTS5 tables are available + const porterTableExists = sql.getValue(` SELECT COUNT(*) FROM sqlite_master WHERE type = 'table' AND name = 'notes_fts' `); - this.isFTS5Available = result > 0; + const trigramTableExists = sql.getValue(` + SELECT COUNT(*) + FROM sqlite_master + WHERE type = 'table' + AND name = 'notes_fts_trigram' + `); + + this.isFTS5Available = porterTableExists > 0 && trigramTableExists > 0; if (!this.isFTS5Available) { - log.info("FTS5 table not found. Full-text search will use fallback implementation."); + log.info("FTS5 tables not found. Full-text search will use fallback implementation."); } } catch (error) { log.error(`Error checking FTS5 availability: ${error}`); @@ -135,6 +142,9 @@ class FTSSearchService { return `"${sanitizedTokens.join(" ")}"`; case "*=*": // Contains all tokens (AND) + // For substring matching, we'll use the trigram table + // which is designed for substring searches + // The trigram tokenizer will handle the substring matching return sanitizedTokens.join(" AND "); case "*=": // Ends with @@ -206,7 +216,7 @@ class FTSSearchService { throw new FTSNotAvailableError(); } - const { + let { limit = FTS_CONFIG.DEFAULT_LIMIT, offset = 0, includeSnippets = true, @@ -214,6 +224,9 @@ class FTSSearchService { highlightTag = FTS_CONFIG.DEFAULT_HIGHLIGHT_START, searchProtected = false } = options; + + // Track if we need post-filtering + let needsPostFiltering = false; try { const ftsQuery = this.convertToFTS5Query(tokens, operator); @@ -235,8 +248,12 @@ class FTSSearchService { return []; } + // Determine which FTS table to use based on operator + // Use trigram table for substring searches (*=* operator) + const ftsTable = operator === '*=*' ? 'notes_fts_trigram' : 'notes_fts'; + // Build the SQL query - let whereConditions = [`notes_fts MATCH ?`]; + let whereConditions = [`${ftsTable} MATCH ?`]; const params: any[] = [ftsQuery]; // Filter by noteIds if provided @@ -247,36 +264,75 @@ class FTSSearchService { // All provided notes are protected, return empty results return []; } - whereConditions.push(`noteId IN (${nonProtectedNoteIds.map(() => '?').join(',')})`); - params.push(...nonProtectedNoteIds); + + // SQLite has a limit on the number of parameters (usually 999 or 32766) + // If we have too many noteIds, we need to handle this differently + const SQLITE_MAX_PARAMS = 900; // Conservative limit to be safe + + if (nonProtectedNoteIds.length > SQLITE_MAX_PARAMS) { + // Too many noteIds to filter in SQL - we'll filter in post-processing + // This is less efficient but avoids the SQL variable limit + log.info(`Too many noteIds for SQL filter (${nonProtectedNoteIds.length}), will filter in post-processing`); + // Don't add the noteId filter to the query + // But we need to get ALL results since we'll filter them + needsPostFiltering = true; + // Set limit to -1 to remove limit entirely + limit = -1; // No limit + } else { + whereConditions.push(`noteId IN (${nonProtectedNoteIds.map(() => '?').join(',')})`); + params.push(...nonProtectedNoteIds); + } } // Build snippet extraction if requested + // Note: snippet function uses the table name from the query const snippetSelect = includeSnippets - ? `, snippet(notes_fts, ${FTS_CONFIG.SNIPPET_COLUMN_CONTENT}, '${highlightTag}', '${highlightTag.replace('<', '(query, params); + // Post-process filtering if we had too many noteIds for SQL + if (needsPostFiltering && noteIds && noteIds.size > 0) { + const noteIdSet = new Set(this.filterNonProtectedNoteIds(noteIds)); + results = results.filter(result => noteIdSet.has(result.noteId)); + log.info(`Post-filtered FTS results: ${results.length} results after filtering from ${noteIdSet.size} allowed noteIds`); + } + return results; } catch (error: any) { @@ -305,16 +361,40 @@ class FTSSearchService { */ private filterNonProtectedNoteIds(noteIds: Set): string[] { const noteIdList = Array.from(noteIds); - const placeholders = noteIdList.map(() => '?').join(','); + const BATCH_SIZE = 900; // Conservative limit for SQL parameters - const nonProtectedNotes = sql.getColumn(` - SELECT noteId - FROM notes - WHERE noteId IN (${placeholders}) - AND isProtected = 0 - `, noteIdList); - - return nonProtectedNotes; + if (noteIdList.length <= BATCH_SIZE) { + // Small enough to do in one query + const placeholders = noteIdList.map(() => '?').join(','); + + const nonProtectedNotes = sql.getColumn(` + SELECT noteId + FROM notes + WHERE noteId IN (${placeholders}) + AND isProtected = 0 + `, noteIdList); + + return nonProtectedNotes; + } else { + // Process in batches to avoid SQL parameter limit + const nonProtectedNotes: string[] = []; + + for (let i = 0; i < noteIdList.length; i += BATCH_SIZE) { + const batch = noteIdList.slice(i, i + BATCH_SIZE); + const placeholders = batch.map(() => '?').join(','); + + const batchResults = sql.getColumn(` + SELECT noteId + FROM notes + WHERE noteId IN (${placeholders}) + AND isProtected = 0 + `, batch); + + nonProtectedNotes.push(...batchResults); + } + + return nonProtectedNotes; + } } /** @@ -340,15 +420,26 @@ class FTSSearchService { // Build query for protected notes only let whereConditions = [`n.isProtected = 1`, `n.isDeleted = 0`]; const params: any[] = []; + let needPostFilter = false; + let postFilterNoteIds: Set | null = null; if (noteIds && noteIds.size > 0) { const noteIdList = Array.from(noteIds); - whereConditions.push(`n.noteId IN (${noteIdList.map(() => '?').join(',')})`); - params.push(...noteIdList); + const BATCH_SIZE = 900; // Conservative SQL parameter limit + + if (noteIdList.length > BATCH_SIZE) { + // Too many noteIds, we'll filter in post-processing + needPostFilter = true; + postFilterNoteIds = noteIds; + log.info(`Too many noteIds for protected notes SQL filter (${noteIdList.length}), will filter in post-processing`); + } else { + whereConditions.push(`n.noteId IN (${noteIdList.map(() => '?').join(',')})`); + params.push(...noteIdList); + } } // Get protected notes - const protectedNotes = sql.getRows<{ + let protectedNotes = sql.getRows<{ noteId: string; title: string; content: string | null; @@ -360,6 +451,11 @@ class FTSSearchService { AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') LIMIT ? OFFSET ? `, [...params, limit, offset]); + + // Post-filter if needed + if (needPostFilter && postFilterNoteIds) { + protectedNotes = protectedNotes.filter(note => postFilterNoteIds!.has(note.noteId)); + } const results: FTSSearchResult[] = []; @@ -451,14 +547,20 @@ class FTSSearchService { try { sql.transactional(() => { - // Delete existing entry + // Delete existing entries from both FTS tables sql.execute(`DELETE FROM notes_fts WHERE noteId = ?`, [noteId]); + sql.execute(`DELETE FROM notes_fts_trigram WHERE noteId = ?`, [noteId]); - // Insert new entry + // Insert new entries into both FTS tables sql.execute(` INSERT INTO notes_fts (noteId, title, content) VALUES (?, ?, ?) `, [noteId, title, content]); + + sql.execute(` + INSERT INTO notes_fts_trigram (noteId, title, content) + VALUES (?, ?, ?) + `, [noteId, title, content]); }); } catch (error) { log.error(`Failed to update FTS index for note ${noteId}: ${error}`); @@ -477,6 +579,7 @@ class FTSSearchService { try { sql.execute(`DELETE FROM notes_fts WHERE noteId = ?`, [noteId]); + sql.execute(`DELETE FROM notes_fts_trigram WHERE noteId = ?`, [noteId]); } catch (error) { log.error(`Failed to remove note ${noteId} from FTS index: ${error}`); } @@ -499,34 +602,62 @@ class FTSSearchService { let syncedCount = 0; sql.transactional(() => { - let query: string; - let params: any[] = []; + const BATCH_SIZE = 900; // Conservative SQL parameter limit if (noteIds && noteIds.length > 0) { - // Sync specific notes that are missing from FTS - const placeholders = noteIds.map(() => '?').join(','); - query = ` - WITH missing_notes AS ( - SELECT - n.noteId, - n.title, - b.content - FROM notes n - LEFT JOIN blobs b ON n.blobId = b.blobId - WHERE n.noteId IN (${placeholders}) - AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0 - AND b.content IS NOT NULL - AND NOT EXISTS (SELECT 1 FROM notes_fts WHERE noteId = n.noteId) - ) - INSERT INTO notes_fts (noteId, title, content) - SELECT noteId, title, content FROM missing_notes - `; - params = noteIds; + // Process in batches if too many noteIds + for (let i = 0; i < noteIds.length; i += BATCH_SIZE) { + const batch = noteIds.slice(i, i + BATCH_SIZE); + const placeholders = batch.map(() => '?').join(','); + + // Sync to porter FTS table + const queryPorter = ` + WITH missing_notes AS ( + SELECT + n.noteId, + n.title, + b.content + FROM notes n + LEFT JOIN blobs b ON n.blobId = b.blobId + WHERE n.noteId IN (${placeholders}) + AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0 + AND b.content IS NOT NULL + AND NOT EXISTS (SELECT 1 FROM notes_fts WHERE noteId = n.noteId) + ) + INSERT INTO notes_fts (noteId, title, content) + SELECT noteId, title, content FROM missing_notes + `; + + const resultPorter = sql.execute(queryPorter, batch); + + // Sync to trigram FTS table + const queryTrigram = ` + WITH missing_notes_trigram AS ( + SELECT + n.noteId, + n.title, + b.content + FROM notes n + LEFT JOIN blobs b ON n.blobId = b.blobId + WHERE n.noteId IN (${placeholders}) + AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0 + AND b.content IS NOT NULL + AND NOT EXISTS (SELECT 1 FROM notes_fts_trigram WHERE noteId = n.noteId) + ) + INSERT INTO notes_fts_trigram (noteId, title, content) + SELECT noteId, title, content FROM missing_notes_trigram + `; + + const resultTrigram = sql.execute(queryTrigram, batch); + syncedCount += Math.max(resultPorter.changes, resultTrigram.changes); + } } else { - // Sync all missing notes - query = ` + // Sync all missing notes to porter FTS table + const queryPorter = ` WITH missing_notes AS ( SELECT n.noteId, @@ -543,16 +674,38 @@ class FTSSearchService { INSERT INTO notes_fts (noteId, title, content) SELECT noteId, title, content FROM missing_notes `; + + const resultPorter = sql.execute(queryPorter, []); + + // Sync all missing notes to trigram FTS table + const queryTrigram = ` + WITH missing_notes_trigram AS ( + SELECT + n.noteId, + n.title, + b.content + FROM notes n + LEFT JOIN blobs b ON n.blobId = b.blobId + WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0 + AND b.content IS NOT NULL + AND NOT EXISTS (SELECT 1 FROM notes_fts_trigram WHERE noteId = n.noteId) + ) + INSERT INTO notes_fts_trigram (noteId, title, content) + SELECT noteId, title, content FROM missing_notes_trigram + `; + + const resultTrigram = sql.execute(queryTrigram, []); + syncedCount = Math.max(resultPorter.changes, resultTrigram.changes); } - const result = sql.execute(query, params); - syncedCount = result.changes; - if (syncedCount > 0) { log.info(`Synced ${syncedCount} missing notes to FTS index`); - // Optimize if we synced a significant number of notes + // Optimize both FTS tables if we synced a significant number of notes if (syncedCount > 100) { sql.execute(`INSERT INTO notes_fts(notes_fts) VALUES('optimize')`); + sql.execute(`INSERT INTO notes_fts_trigram(notes_fts_trigram) VALUES('optimize')`); } } }); @@ -578,10 +731,11 @@ class FTSSearchService { try { sql.transactional(() => { - // Clear existing index + // Clear existing indexes sql.execute(`DELETE FROM notes_fts`); + sql.execute(`DELETE FROM notes_fts_trigram`); - // Rebuild from notes + // Rebuild both FTS tables from notes sql.execute(` INSERT INTO notes_fts (noteId, title, content) SELECT @@ -594,9 +748,23 @@ class FTSSearchService { AND n.isDeleted = 0 AND n.isProtected = 0 `); + + sql.execute(` + INSERT INTO notes_fts_trigram (noteId, title, content) + SELECT + n.noteId, + n.title, + b.content + FROM notes n + LEFT JOIN blobs b ON n.blobId = b.blobId + WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0 + `); - // Optimize the FTS table + // Optimize both FTS tables sql.execute(`INSERT INTO notes_fts(notes_fts) VALUES('optimize')`); + sql.execute(`INSERT INTO notes_fts_trigram(notes_fts_trigram) VALUES('optimize')`); }); log.info("FTS5 index rebuild completed"); @@ -626,7 +794,12 @@ class FTSSearchService { } const totalDocuments = sql.getValue(` - SELECT COUNT(*) FROM notes_fts + SELECT COUNT(DISTINCT noteId) + FROM ( + SELECT noteId FROM notes_fts + UNION + SELECT noteId FROM notes_fts_trigram + ) `) || 0; let indexSize = 0; @@ -635,10 +808,12 @@ class FTSSearchService { try { // Try to get index size from dbstat // dbstat is a virtual table that may not be available in all SQLite builds + // Get size for both FTS tables indexSize = sql.getValue(` SELECT SUM(pgsize) FROM dbstat - WHERE name LIKE 'notes_fts%' + WHERE name LIKE 'notes_fts%' + OR name LIKE 'notes_fts_trigram%' `) || 0; dbstatAvailable = true; } catch (error: any) { diff --git a/package.json b/package.json index 049b21810..8dda1a399 100644 --- a/package.json +++ b/package.json @@ -19,6 +19,7 @@ "chore:generate-openapi": "tsx ./scripts/generate-openapi.ts", "chore:update-build-info": "tsx ./scripts/update-build-info.ts", "chore:update-version": "tsx ./scripts/update-version.ts", + "stress-test:native": "DATA_DIR=apps/server/data tsx ./scripts/stress-test-native-simple.ts", "test:all": "pnpm test:parallel && pnpm test:sequential", "test:parallel": "pnpm nx run-many -t test --all --exclude=server,ckeditor5-mermaid,ckeditor5-math --parallel", "test:sequential": "pnpm nx run-many -t test --projects=server,ckeditor5-mermaid,ckeditor5-math --parallel=1", diff --git a/scripts/stress-test-native-simple.ts b/scripts/stress-test-native-simple.ts new file mode 100644 index 000000000..bdfe2b327 --- /dev/null +++ b/scripts/stress-test-native-simple.ts @@ -0,0 +1,370 @@ +#!/usr/bin/env tsx +/** + * Native API Stress Test Utility (Simplified) + * Uses Trilium's native services to create notes without complex dependencies + * + * Usage: DATA_DIR=apps/server/data pnpm tsx scripts/stress-test-native-simple.ts [batch-size] + * + * Example: + * DATA_DIR=apps/server/data pnpm tsx scripts/stress-test-native-simple.ts 10000 + * DATA_DIR=apps/server/data pnpm tsx scripts/stress-test-native-simple.ts 1000 100 + */ + +import Database from 'better-sqlite3'; +import * as path from 'path'; +import * as fs from 'fs'; +import { randomBytes } from 'crypto'; + +const noteCount = parseInt(process.argv[2]); +const batchSize = parseInt(process.argv[3]) || 100; + +if (!noteCount || noteCount < 1) { + console.error(`Please enter number of notes as program parameter.`); + console.error(`Usage: DATA_DIR=apps/server/data pnpm tsx scripts/stress-test-native-simple.ts [batch-size]`); + process.exit(1); +} + +// Set up database path +const DATA_DIR = process.env.DATA_DIR || 'apps/server/data'; +const DB_PATH = path.join(DATA_DIR, 'document.db'); + +if (!fs.existsSync(DB_PATH)) { + console.error(`Database not found at ${DB_PATH}`); + console.error('Please ensure the server has been run at least once to create the database.'); + process.exit(1); +} + +console.log(`\nšŸš€ Trilium Native-Style Stress Test Utility`); +console.log(`============================================`); +console.log(` Notes to create: ${noteCount.toLocaleString()}`); +console.log(` Batch size: ${batchSize.toLocaleString()}`); +console.log(` Database: ${DB_PATH}`); +console.log(`============================================\n`); + +// Open database +const db = new Database(DB_PATH); + +// Enable optimizations +db.pragma('journal_mode = WAL'); +db.pragma('synchronous = NORMAL'); +db.pragma('cache_size = 10000'); +db.pragma('temp_store = MEMORY'); + +// Helper functions that mimic Trilium's ID generation +function newEntityId(prefix: string = ''): string { + return prefix + randomBytes(12).toString('base64').replace(/[+/=]/g, '').substring(0, 12); +} + +function utcNowDateTime(): string { + return new Date().toISOString().replace('T', ' ').replace(/\.\d{3}Z$/, ''); +} + +// Word lists for content generation +const words = [ + 'lorem', 'ipsum', 'dolor', 'sit', 'amet', 'consectetur', 'adipiscing', 'elit', + 'sed', 'do', 'eiusmod', 'tempor', 'incididunt', 'ut', 'labore', 'et', 'dolore', + 'magna', 'aliqua', 'enim', 'ad', 'minim', 'veniam', 'quis', 'nostrud' +]; + +const titleTemplates = [ + 'Project ${word1} ${word2}', + 'Meeting Notes: ${word1} ${word2}', + 'TODO: ${word1} ${word2} ${word3}', + 'Research on ${word1} and ${word2}', + 'Analysis of ${word1} ${word2}' +]; + +const attributeNames = [ + 'archived', 'hideInNote', 'readOnly', 'cssClass', 'iconClass', + 'pageSize', 'viewType', 'template', 'widget', 'index', + 'label', 'promoted', 'hideChildrenOverview', 'collapsed' +]; + +const noteTypes = ['text', 'code', 'book', 'render', 'canvas', 'mermaid', 'search']; + +function getRandomWord(): string { + return words[Math.floor(Math.random() * words.length)]; +} + +function capitalize(word: string): string { + return word.charAt(0).toUpperCase() + word.slice(1); +} + +function generateTitle(): string { + const template = titleTemplates[Math.floor(Math.random() * titleTemplates.length)]; + return template + .replace('${word1}', capitalize(getRandomWord())) + .replace('${word2}', capitalize(getRandomWord())) + .replace('${word3}', capitalize(getRandomWord())); +} + +function generateContent(): string { + const paragraphCount = Math.floor(Math.random() * 5) + 1; + const paragraphs = []; + + for (let i = 0; i < paragraphCount; i++) { + const sentenceCount = Math.floor(Math.random() * 5) + 3; + const sentences = []; + + for (let j = 0; j < sentenceCount; j++) { + const wordCount = Math.floor(Math.random() * 15) + 5; + const sentenceWords = []; + + for (let k = 0; k < wordCount; k++) { + sentenceWords.push(getRandomWord()); + } + + sentenceWords[0] = capitalize(sentenceWords[0]); + sentences.push(sentenceWords.join(' ') + '.'); + } + + paragraphs.push(`

${sentences.join(' ')}

`); + } + + return paragraphs.join('\n'); +} + +// Native-style service functions +function createNote(params: { + noteId: string; + title: string; + content: string; + type: string; + mime?: string; + isProtected?: boolean; + parentNoteId?: string; +}) { + const currentDateTime = utcNowDateTime(); + const noteStmt = db.prepare(` + INSERT INTO notes (noteId, title, isProtected, type, mime, blobId, isDeleted, deleteId, + dateCreated, dateModified, utcDateCreated, utcDateModified) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + `); + + const blobStmt = db.prepare(` + INSERT INTO blobs (blobId, content, dateModified, utcDateModified) + VALUES (?, ?, ?, ?) + `); + + const branchStmt = db.prepare(` + INSERT INTO branches (branchId, noteId, parentNoteId, notePosition, prefix, + isExpanded, isDeleted, deleteId, utcDateModified) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + `); + + // Create blob + const blobId = newEntityId(); + blobStmt.run( + blobId, + Buffer.from(params.content, 'utf-8'), + currentDateTime, + currentDateTime + ); + + // Create note + noteStmt.run( + params.noteId, + params.title, + params.isProtected ? 1 : 0, + params.type, + params.mime || (params.type === 'code' ? 'text/plain' : 'text/html'), + blobId, + 0, + null, + currentDateTime, + currentDateTime, + currentDateTime, + currentDateTime + ); + + // Create branch if parent specified + if (params.parentNoteId) { + branchStmt.run( + newEntityId(), + params.noteId, + params.parentNoteId, + Math.floor(Math.random() * 1000), + null, + 0, + 0, + null, + currentDateTime + ); + } + + return params.noteId; +} + +function createAttribute(params: { + noteId: string; + type: 'label' | 'relation'; + name: string; + value: string; + isInheritable?: boolean; +}) { + const currentDateTime = utcNowDateTime(); + const stmt = db.prepare(` + INSERT INTO attributes (attributeId, noteId, type, name, value, position, + utcDateModified, isDeleted, deleteId, isInheritable) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + `); + + stmt.run( + newEntityId(), + params.noteId, + params.type, + params.name, + params.value, + 0, + currentDateTime, + 0, + null, + params.isInheritable ? 1 : 0 + ); +} + +async function main() { + const startTime = Date.now(); + const allNoteIds: string[] = ['root']; + let notesCreated = 0; + let attributesCreated = 0; + + console.log('Starting note generation...\n'); + + // Create container note + const containerNoteId = newEntityId(); + const containerTransaction = db.transaction(() => { + createNote({ + noteId: containerNoteId, + title: `Stress Test ${new Date().toISOString()}`, + content: `

Container for stress test with ${noteCount} notes

`, + type: 'text', + parentNoteId: 'root' + }); + }); + containerTransaction(); + + console.log(`Created container note: ${containerNoteId}`); + allNoteIds.push(containerNoteId); + + // Process in batches + for (let batch = 0; batch < Math.ceil(noteCount / batchSize); batch++) { + const batchStart = batch * batchSize; + const batchEnd = Math.min(batchStart + batchSize, noteCount); + const batchNoteCount = batchEnd - batchStart; + + const batchTransaction = db.transaction(() => { + for (let i = 0; i < batchNoteCount; i++) { + const noteId = newEntityId(); + const type = noteTypes[Math.floor(Math.random() * noteTypes.length)]; + + // Decide parent - either container or random existing note + let parentNoteId = containerNoteId; + if (allNoteIds.length > 10 && Math.random() < 0.3) { + parentNoteId = allNoteIds[Math.floor(Math.random() * Math.min(allNoteIds.length, 100))]; + } + + // Create note + createNote({ + noteId, + title: generateTitle(), + content: generateContent(), + type, + parentNoteId, + isProtected: Math.random() < 0.05 + }); + + notesCreated++; + allNoteIds.push(noteId); + + // Add attributes + const attributeCount = Math.floor(Math.random() * 5); + for (let a = 0; a < attributeCount; a++) { + const attrType = Math.random() < 0.7 ? 'label' : 'relation'; + const attrName = attributeNames[Math.floor(Math.random() * attributeNames.length)]; + + try { + createAttribute({ + noteId, + type: attrType, + name: attrName, + value: attrType === 'relation' + ? allNoteIds[Math.floor(Math.random() * Math.min(allNoteIds.length, 50))] + : getRandomWord(), + isInheritable: Math.random() < 0.2 + }); + attributesCreated++; + } catch (e) { + // Ignore duplicate errors + } + } + + // Keep memory in check + if (allNoteIds.length > 500) { + allNoteIds.splice(1, allNoteIds.length - 500); + } + } + }); + + batchTransaction(); + + const progress = Math.round(((batch + 1) / Math.ceil(noteCount / batchSize)) * 100); + const elapsed = (Date.now() - startTime) / 1000; + const rate = Math.round(notesCreated / elapsed); + + console.log(`Progress: ${progress}% | Notes: ${notesCreated}/${noteCount} | Rate: ${rate}/sec | Attributes: ${attributesCreated}`); + } + + // Add entity changes + console.log('\nAdding entity changes...'); + const entityTransaction = db.transaction(() => { + const stmt = db.prepare(` + INSERT OR REPLACE INTO entity_changes + (entityName, entityId, hash, isErased, changeId, componentId, instanceId, isSynced, utcDateChanged) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + `); + + for (let i = 0; i < Math.min(100, allNoteIds.length); i++) { + stmt.run( + 'notes', + allNoteIds[i], + randomBytes(16).toString('hex'), + 0, + newEntityId(), + 'stress_test', + 'stress_test_instance', + 1, + utcNowDateTime() + ); + } + }); + entityTransaction(); + + const endTime = Date.now(); + const duration = (endTime - startTime) / 1000; + + // Get statistics + const stats = { + notes: db.prepare('SELECT COUNT(*) as count FROM notes').get() as any, + branches: db.prepare('SELECT COUNT(*) as count FROM branches').get() as any, + attributes: db.prepare('SELECT COUNT(*) as count FROM attributes').get() as any, + blobs: db.prepare('SELECT COUNT(*) as count FROM blobs').get() as any + }; + + console.log('\nāœ… Native-style stress test completed successfully!\n'); + console.log('Database Statistics:'); + console.log(` • Total notes: ${stats.notes.count.toLocaleString()}`); + console.log(` • Total branches: ${stats.branches.count.toLocaleString()}`); + console.log(` • Total attributes: ${stats.attributes.count.toLocaleString()}`); + console.log(` • Total blobs: ${stats.blobs.count.toLocaleString()}`); + console.log(` • Time taken: ${duration.toFixed(2)} seconds`); + console.log(` • Average rate: ${Math.round(noteCount / duration).toLocaleString()} notes/second`); + console.log(` • Container note ID: ${containerNoteId}\n`); + + db.close(); +} + +main().catch((error) => { + console.error('Error:', error); + process.exit(1); +}); \ No newline at end of file diff --git a/scripts/stress-test-native.ts b/scripts/stress-test-native.ts new file mode 100644 index 000000000..d901c4f47 --- /dev/null +++ b/scripts/stress-test-native.ts @@ -0,0 +1,421 @@ +#!/usr/bin/env tsx +/** + * Native API Stress Test Utility + * Uses Trilium's native services to create notes instead of direct DB access + * + * Usage: + * cd apps/server && NODE_ENV=development pnpm tsx ../../scripts/stress-test-native.ts [batch-size] + * + * Example: + * cd apps/server && NODE_ENV=development pnpm tsx ../../scripts/stress-test-native.ts 10000 # Create 10,000 notes + * cd apps/server && NODE_ENV=development pnpm tsx ../../scripts/stress-test-native.ts 1000 100 # Create 1,000 notes in batches of 100 + */ + +// Set up environment +process.env.NODE_ENV = process.env.NODE_ENV || 'development'; +process.env.DATA_DIR = process.env.DATA_DIR || './data'; + +import './src/becca/entity_constructor.js'; +import sqlInit from './src/services/sql_init.js'; +import noteService from './src/services/notes.js'; +import attributeService from './src/services/attributes.js'; +import cls from './src/services/cls.js'; +import cloningService from './src/services/cloning.js'; +import sql from './src/services/sql.js'; +import becca from './src/becca/becca.js'; +import entityChangesService from './src/services/entity_changes.js'; +import type BNote from './src/becca/entities/bnote.js'; + +const noteCount = parseInt(process.argv[2]); +const batchSize = parseInt(process.argv[3]) || 100; + +if (!noteCount || noteCount < 1) { + console.error(`Please enter number of notes as program parameter.`); + console.error(`Usage: cd apps/server && NODE_ENV=development pnpm tsx ../../scripts/stress-test-native.ts [batch-size]`); + process.exit(1); +} + +console.log(`\nšŸš€ Trilium Native API Stress Test Utility`); +console.log(`==========================================`); +console.log(` Notes to create: ${noteCount.toLocaleString()}`); +console.log(` Batch size: ${batchSize.toLocaleString()}`); +console.log(` Using native Trilium services`); +console.log(`==========================================\n`); + +// Word lists for generating content +const words = [ + 'lorem', 'ipsum', 'dolor', 'sit', 'amet', 'consectetur', 'adipiscing', 'elit', + 'sed', 'do', 'eiusmod', 'tempor', 'incididunt', 'ut', 'labore', 'et', 'dolore', + 'magna', 'aliqua', 'enim', 'ad', 'minim', 'veniam', 'quis', 'nostrud', + 'exercitation', 'ullamco', 'laboris', 'nisi', 'aliquip', 'ex', 'ea', 'commodo', + 'consequat', 'duis', 'aute', 'irure', 'in', 'reprehenderit', 'voluptate', + 'velit', 'esse', 'cillum', 'fugiat', 'nulla', 'pariatur', 'excepteur', 'sint', + 'occaecat', 'cupidatat', 'non', 'proident', 'sunt', 'culpa', 'qui', 'officia', + 'deserunt', 'mollit', 'anim', 'id', 'est', 'laborum', 'perspiciatis', 'unde', + 'omnis', 'iste', 'natus', 'error', 'voluptatem', 'accusantium', 'doloremque' +]; + +const titleTemplates = [ + 'Project ${word1} ${word2}', + 'Meeting Notes: ${word1} ${word2}', + 'TODO: ${word1} ${word2} ${word3}', + 'Research on ${word1} and ${word2}', + 'Analysis of ${word1} ${word2}', + 'Guide to ${word1} ${word2}', + 'Notes about ${word1}', + '${word1} ${word2} Documentation', + 'Summary: ${word1} ${word2} ${word3}', + 'Report on ${word1} ${word2}', + 'Task: ${word1} Implementation', + 'Review of ${word1} ${word2}' +]; + +const attributeNames = [ + 'archived', 'hideInNote', 'readOnly', 'cssClass', 'iconClass', + 'pageSize', 'viewType', 'template', 'widget', 'index', + 'label', 'promoted', 'hideChildrenOverview', 'collapsed', + 'sortDirection', 'color', 'weight', 'fontSize', 'fontFamily', + 'priority', 'status', 'category', 'tag', 'milestone' +]; + +const noteTypes = ['text', 'code', 'book', 'render', 'canvas', 'mermaid', 'search', 'relationMap']; + +function getRandomWord(): string { + return words[Math.floor(Math.random() * words.length)]; +} + +function capitalize(word: string): string { + return word.charAt(0).toUpperCase() + word.slice(1); +} + +function generateTitle(): string { + const template = titleTemplates[Math.floor(Math.random() * titleTemplates.length)]; + return template + .replace('${word1}', capitalize(getRandomWord())) + .replace('${word2}', capitalize(getRandomWord())) + .replace('${word3}', capitalize(getRandomWord())); +} + +function generateContent(minParagraphs: number = 1, maxParagraphs: number = 10): string { + const paragraphCount = Math.floor(Math.random() * (maxParagraphs - minParagraphs) + minParagraphs); + const paragraphs = []; + + for (let i = 0; i < paragraphCount; i++) { + const sentenceCount = Math.floor(Math.random() * 5) + 3; + const sentences = []; + + for (let j = 0; j < sentenceCount; j++) { + const wordCount = Math.floor(Math.random() * 15) + 5; + const sentenceWords = []; + + for (let k = 0; k < wordCount; k++) { + sentenceWords.push(getRandomWord()); + } + + sentenceWords[0] = capitalize(sentenceWords[0]); + sentences.push(sentenceWords.join(' ') + '.'); + } + + paragraphs.push(`

${sentences.join(' ')}

`); + } + + return paragraphs.join('\n'); +} + +function generateCodeContent(): string { + const templates = [ + `function ${getRandomWord()}() {\n // ${generateSentence()}\n return ${Math.random() > 0.5 ? 'true' : 'false'};\n}`, + `const ${getRandomWord()} = {\n ${getRandomWord()}: "${getRandomWord()}",\n ${getRandomWord()}: ${Math.floor(Math.random() * 1000)}\n};`, + `class ${capitalize(getRandomWord())} {\n constructor() {\n this.${getRandomWord()} = "${getRandomWord()}";\n }\n + ${getRandomWord()}() {\n return this.${getRandomWord()};\n }\n}`, + `SELECT * FROM ${getRandomWord()} WHERE ${getRandomWord()} = '${getRandomWord()}';`, + `#!/bin/bash\n# ${generateSentence()}\necho "${generateSentence()}"\n${getRandomWord()}="${getRandomWord()}"\nexport ${getRandomWord().toUpperCase()}`, + `import { ${getRandomWord()} } from './${getRandomWord()}';\nimport * as ${getRandomWord()} from '${getRandomWord()}';\n\nexport function ${getRandomWord()}() {\n return ${getRandomWord()}();\n}`, + `# ${generateTitle()}\n\n## ${capitalize(getRandomWord())}\n\n${generateSentence()}\n\n\`\`\`python\ndef ${getRandomWord()}():\n return "${getRandomWord()}"\n\`\`\``, + `apiVersion: v1\nkind: ${capitalize(getRandomWord())}\nmetadata:\n name: ${getRandomWord()}\nspec:\n ${getRandomWord()}: ${getRandomWord()}` + ]; + + return templates[Math.floor(Math.random() * templates.length)]; +} + +function generateMermaidContent(): string { + const templates = [ + `graph TD\n A[${capitalize(getRandomWord())}] --> B[${capitalize(getRandomWord())}]\n B --> C[${capitalize(getRandomWord())}]\n C --> D[${capitalize(getRandomWord())}]`, + `sequenceDiagram\n ${capitalize(getRandomWord())}->>+${capitalize(getRandomWord())}: ${generateSentence()}\n ${capitalize(getRandomWord())}-->>-${capitalize(getRandomWord())}: ${getRandomWord()}`, + `flowchart LR\n Start --> ${capitalize(getRandomWord())}\n ${capitalize(getRandomWord())} --> ${capitalize(getRandomWord())}\n ${capitalize(getRandomWord())} --> End`, + `classDiagram\n class ${capitalize(getRandomWord())} {\n +${getRandomWord()}()\n -${getRandomWord()}\n }\n class ${capitalize(getRandomWord())} {\n +${getRandomWord()}()\n }` + ]; + + return templates[Math.floor(Math.random() * templates.length)]; +} + +function generateSentence(): string { + const wordCount = Math.floor(Math.random() * 10) + 5; + const wordList = []; + for (let i = 0; i < wordCount; i++) { + wordList.push(getRandomWord()); + } + wordList[0] = capitalize(wordList[0]); + return wordList.join(' '); +} + +async function start() { + const startTime = Date.now(); + const allNotes: BNote[] = []; + let notesCreated = 0; + let attributesCreated = 0; + let clonesCreated = 0; + let revisionsCreated = 0; + + console.log('Starting note generation using native Trilium services...\n'); + + // Find root note + const rootNote = becca.getNote('root'); + if (!rootNote) { + console.error('Root note not found!'); + process.exit(1); + } + + // Create a container note for our stress test + const { note: containerNote } = noteService.createNewNote({ + parentNoteId: 'root', + title: `Stress Test ${new Date().toISOString()}`, + content: `

Container for stress test with ${noteCount} notes

`, + type: 'text', + isProtected: false + }); + + console.log(`Created container note: ${containerNote.title} (${containerNote.noteId})`); + allNotes.push(containerNote); + + // Process in batches for better control + for (let batch = 0; batch < Math.ceil(noteCount / batchSize); batch++) { + const batchStart = batch * batchSize; + const batchEnd = Math.min(batchStart + batchSize, noteCount); + const batchNoteCount = batchEnd - batchStart; + + sql.transactional(() => { + for (let i = 0; i < batchNoteCount; i++) { + const type = noteTypes[Math.floor(Math.random() * noteTypes.length)]; + let content = ''; + let mime = undefined; + + // Generate content based on type + switch (type) { + case 'code': + content = generateCodeContent(); + mime = 'text/plain'; + break; + case 'mermaid': + content = generateMermaidContent(); + mime = 'text/plain'; + break; + case 'canvas': + content = JSON.stringify({ + elements: [], + appState: { viewBackgroundColor: "#ffffff" }, + files: {} + }); + mime = 'application/json'; + break; + case 'search': + content = JSON.stringify({ + searchString: `#${getRandomWord()} OR #${getRandomWord()}` + }); + mime = 'application/json'; + break; + case 'relationMap': + content = JSON.stringify({ + notes: [], + zoom: 1 + }); + mime = 'application/json'; + break; + default: + content = generateContent(); + mime = 'text/html'; + } + + // Decide parent - either container or random existing note for complex hierarchy + let parentNoteId = containerNote.noteId; + if (allNotes.length > 10 && Math.random() < 0.3) { + // 30% chance to attach to random existing note + parentNoteId = allNotes[Math.floor(Math.random() * Math.min(allNotes.length, 100))].noteId; + } + + // Create the note using native service + const { note, branch } = noteService.createNewNote({ + parentNoteId, + title: generateTitle(), + content, + type, + mime, + isProtected: Math.random() < 0.05 // 5% protected notes + }); + + notesCreated++; + allNotes.push(note); + + // Add attributes using native service + const attributeCount = Math.floor(Math.random() * 8); + for (let a = 0; a < attributeCount; a++) { + const attrType = Math.random() < 0.7 ? 'label' : 'relation'; + const attrName = attributeNames[Math.floor(Math.random() * attributeNames.length)]; + + try { + if (attrType === 'label') { + attributeService.createLabel( + note.noteId, + attrName, + Math.random() < 0.5 ? getRandomWord() : '' + ); + attributesCreated++; + } else if (allNotes.length > 1) { + const targetNote = allNotes[Math.floor(Math.random() * Math.min(allNotes.length, 50))]; + attributeService.createRelation( + note.noteId, + attrName, + targetNote.noteId + ); + attributesCreated++; + } + } catch (e) { + // Ignore attribute creation errors (e.g., duplicates) + } + } + + // Update note content occasionally to trigger revisions + if (Math.random() < 0.1) { // 10% chance + note.setContent(content + `\n

Updated at ${new Date().toISOString()}

`); + note.save(); + + // Save revision + if (Math.random() < 0.5) { + note.saveRevision(); + revisionsCreated++; + } + } + + // Create clones occasionally for complex relationships + if (allNotes.length > 20 && Math.random() < 0.05) { // 5% chance + try { + const targetParent = allNotes[Math.floor(Math.random() * allNotes.length)]; + const result = cloningService.cloneNoteToBranch( + note.noteId, + targetParent.noteId, + Math.random() < 0.2 ? 'clone' : '' + ); + if (result.success) { + clonesCreated++; + } + } catch (e) { + // Ignore cloning errors (e.g., circular dependencies) + } + } + + // Add note to recent notes occasionally + if (Math.random() < 0.1) { // 10% chance + try { + sql.execute( + "INSERT OR IGNORE INTO recent_notes (noteId, notePath, utcDateCreated) VALUES (?, ?, ?)", + [note.noteId, note.getBestNotePath()?.path || 'root', note.utcDateCreated] + ); + } catch (e) { + // Table might not exist in all versions + } + } + + // Keep memory usage in check + if (allNotes.length > 500) { + allNotes.splice(0, allNotes.length - 500); + } + } + })(); + + const progress = Math.round(((batch + 1) / Math.ceil(noteCount / batchSize)) * 100); + const elapsed = (Date.now() - startTime) / 1000; + const rate = Math.round(notesCreated / elapsed); + + console.log(`Progress: ${progress}% | Notes: ${notesCreated}/${noteCount} | Rate: ${rate}/sec | Attrs: ${attributesCreated} | Clones: ${clonesCreated} | Revisions: ${revisionsCreated}`); + + // Force entity changes sync + entityChangesService.putNoteReorderingEntityChange(containerNote.noteId); + } + + // Create some advanced structures + console.log('\nCreating advanced relationships...'); + + // Create template notes + const templateNote = noteService.createNewNote({ + parentNoteId: containerNote.noteId, + title: 'Template: ' + generateTitle(), + content: '

This is a template note

', + type: 'text', + isProtected: false + }).note; + + attributeService.createLabel(templateNote.noteId, 'template', ''); + + // Apply template to some notes + for (let i = 0; i < Math.min(10, allNotes.length); i++) { + const targetNote = allNotes[Math.floor(Math.random() * allNotes.length)]; + attributeService.createRelation(targetNote.noteId, 'template', templateNote.noteId); + } + + // Create some CSS notes + const cssNote = noteService.createNewNote({ + parentNoteId: containerNote.noteId, + title: 'Custom CSS', + content: `.custom-class { color: #${Math.floor(Math.random()*16777215).toString(16)}; }`, + type: 'code', + mime: 'text/css', + isProtected: false + }).note; + + attributeService.createLabel(cssNote.noteId, 'appCss', ''); + + // Create widget notes + const widgetNote = noteService.createNewNote({ + parentNoteId: containerNote.noteId, + title: 'Custom Widget', + content: `
Widget content: ${generateSentence()}
`, + type: 'code', + mime: 'text/html', + isProtected: false + }).note; + + attributeService.createLabel(widgetNote.noteId, 'widget', ''); + + const endTime = Date.now(); + const duration = (endTime - startTime) / 1000; + + // Get final statistics + const stats = { + notes: sql.getValue('SELECT COUNT(*) FROM notes'), + branches: sql.getValue('SELECT COUNT(*) FROM branches'), + attributes: sql.getValue('SELECT COUNT(*) FROM attributes'), + revisions: sql.getValue('SELECT COUNT(*) FROM revisions'), + attachments: sql.getValue('SELECT COUNT(*) FROM attachments'), + recentNotes: sql.getValue('SELECT COUNT(*) FROM recent_notes') + }; + + console.log('\nāœ… Native API stress test completed successfully!\n'); + console.log('Database Statistics:'); + console.log(` • Total notes: ${stats.notes?.toLocaleString()}`); + console.log(` • Total branches: ${stats.branches?.toLocaleString()}`); + console.log(` • Total attributes: ${stats.attributes?.toLocaleString()}`); + console.log(` • Total revisions: ${stats.revisions?.toLocaleString()}`); + console.log(` • Total attachments: ${stats.attachments?.toLocaleString()}`); + console.log(` • Recent notes: ${stats.recentNotes?.toLocaleString()}`); + console.log(` • Time taken: ${duration.toFixed(2)} seconds`); + console.log(` • Average rate: ${Math.round(noteCount / duration).toLocaleString()} notes/second`); + console.log(` • Container note ID: ${containerNote.noteId}\n`); + + process.exit(0); +} + +// Initialize database and run stress test +sqlInit.dbReady.then(cls.wrap(start)).catch((err) => { + console.error('Error:', err); + process.exit(1); +}); \ No newline at end of file