mirror of
https://github.com/zadam/trilium.git
synced 2026-03-12 07:00:24 +01:00
Compare commits
2 Commits
renovate/h
...
feat/searc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
77733ce205 | ||
|
|
585b6ccd3e |
306
apps/server/spec/search_profiling.spec.ts
Normal file
306
apps/server/spec/search_profiling.spec.ts
Normal file
@@ -0,0 +1,306 @@
|
||||
/**
|
||||
* Integration-level search profiling test.
|
||||
*
|
||||
* Uses the real SQLite database (spec/db/document.db loaded in-memory),
|
||||
* real sql module, real becca cache, and the full app stack.
|
||||
*
|
||||
* Profiles search at large scale (50K+ notes) to match real-world
|
||||
* performance reports from users with 240K+ notes.
|
||||
*/
|
||||
import { Application } from "express";
|
||||
import { beforeAll, describe, expect, it } from "vitest";
|
||||
import config from "../src/services/config.js";
|
||||
|
||||
let app: Application;
|
||||
|
||||
function timed<T>(fn: () => T): [T, number] {
|
||||
const start = performance.now();
|
||||
const result = fn();
|
||||
return [result, performance.now() - start];
|
||||
}
|
||||
|
||||
function randomId(len = 12): string {
|
||||
const chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
|
||||
let id = "";
|
||||
for (let i = 0; i < len; i++) id += chars[Math.floor(Math.random() * chars.length)];
|
||||
return id;
|
||||
}
|
||||
|
||||
function randomWord(len = 8): string {
|
||||
const chars = "abcdefghijklmnopqrstuvwxyz";
|
||||
let w = "";
|
||||
for (let i = 0; i < len; i++) w += chars[Math.floor(Math.random() * chars.length)];
|
||||
return w;
|
||||
}
|
||||
|
||||
function generateContent(wordCount: number, keyword?: string): string {
|
||||
const paragraphs: string[] = [];
|
||||
let remaining = wordCount;
|
||||
let injected = false;
|
||||
while (remaining > 0) {
|
||||
const n = Math.min(remaining, 30 + Math.floor(Math.random() * 30));
|
||||
const words: string[] = [];
|
||||
for (let i = 0; i < n; i++) words.push(randomWord(3 + Math.floor(Math.random() * 10)));
|
||||
if (keyword && !injected && remaining < wordCount / 2) {
|
||||
words[Math.floor(words.length / 2)] = keyword;
|
||||
injected = true;
|
||||
}
|
||||
paragraphs.push(`<p>${words.join(" ")}</p>`);
|
||||
remaining -= n;
|
||||
}
|
||||
return paragraphs.join("\n");
|
||||
}
|
||||
|
||||
describe("Search profiling (integration)", () => {
|
||||
beforeAll(async () => {
|
||||
config.General.noAuthentication = true;
|
||||
const buildApp = (await import("../src/app.js")).default;
|
||||
app = await buildApp();
|
||||
});
|
||||
|
||||
it("large-scale profiling (50K notes)", async () => {
|
||||
const sql = (await import("../src/services/sql.js")).default;
|
||||
const becca = (await import("../src/becca/becca.js")).default;
|
||||
const beccaLoader = (await import("../src/becca/becca_loader.js")).default;
|
||||
const cls = (await import("../src/services/cls.js")).default;
|
||||
const searchService = (await import("../src/services/search/services/search.js")).default;
|
||||
const SearchContext = (await import("../src/services/search/search_context.js")).default;
|
||||
const beccaService = (await import("../src/becca/becca_service.js")).default;
|
||||
|
||||
await new Promise<void>((resolve) => {
|
||||
cls.init(() => {
|
||||
const initialNoteCount = Object.keys(becca.notes).length;
|
||||
console.log(`\n Initial becca notes: ${initialNoteCount}`);
|
||||
|
||||
// ── Seed 50K notes with hierarchy ──
|
||||
// Some folders (depth), some with common keyword "test" in title
|
||||
const TOTAL_NOTES = 50000;
|
||||
const FOLDER_COUNT = 500; // 500 folders
|
||||
const NOTES_PER_FOLDER = (TOTAL_NOTES - FOLDER_COUNT) / FOLDER_COUNT; // ~99 notes per folder
|
||||
const MATCH_FRACTION = 0.10; // 10% match "test" — ~5000 notes
|
||||
const CONTENT_WORDS = 500;
|
||||
|
||||
const now = new Date().toISOString().replace("T", " ").replace("Z", "+0000");
|
||||
console.log(` Seeding ${TOTAL_NOTES} notes (${FOLDER_COUNT} folders, ~${NOTES_PER_FOLDER.toFixed(0)} per folder)...`);
|
||||
|
||||
const [, seedMs] = timed(() => {
|
||||
sql.transactional(() => {
|
||||
const folderIds: string[] = [];
|
||||
|
||||
// Create folders under root
|
||||
for (let f = 0; f < FOLDER_COUNT; f++) {
|
||||
const noteId = `seed${randomId(8)}`;
|
||||
const branchId = `seed${randomId(8)}`;
|
||||
const blobId = `seed${randomId(16)}`;
|
||||
folderIds.push(noteId);
|
||||
|
||||
sql.execute(
|
||||
`INSERT INTO blobs (blobId, content, dateModified, utcDateModified) VALUES (?, ?, ?, ?)`,
|
||||
[blobId, `<p>Folder ${f}</p>`, now, now]
|
||||
);
|
||||
sql.execute(
|
||||
`INSERT INTO notes (noteId, title, type, mime, blobId, isProtected, isDeleted,
|
||||
dateCreated, dateModified, utcDateCreated, utcDateModified)
|
||||
VALUES (?, ?, 'text', 'text/html', ?, 0, 0, ?, ?, ?, ?)`,
|
||||
[noteId, `Folder ${f} ${randomWord(5)}`, blobId, now, now, now, now]
|
||||
);
|
||||
sql.execute(
|
||||
`INSERT INTO branches (branchId, noteId, parentNoteId, notePosition, isDeleted, isExpanded, utcDateModified)
|
||||
VALUES (?, ?, 'root', ?, 0, 0, ?)`,
|
||||
[branchId, noteId, f * 10, now]
|
||||
);
|
||||
}
|
||||
|
||||
// Create notes under folders
|
||||
let noteIdx = 0;
|
||||
for (let f = 0; f < FOLDER_COUNT; f++) {
|
||||
const parentId = folderIds[f];
|
||||
for (let n = 0; n < NOTES_PER_FOLDER; n++) {
|
||||
const isMatch = noteIdx < TOTAL_NOTES * MATCH_FRACTION;
|
||||
const noteId = `seed${randomId(8)}`;
|
||||
const branchId = `seed${randomId(8)}`;
|
||||
const blobId = `seed${randomId(16)}`;
|
||||
const title = isMatch
|
||||
? `Test Document ${noteIdx} ${randomWord(6)}`
|
||||
: `Note ${noteIdx} ${randomWord(6)} ${randomWord(5)}`;
|
||||
const content = generateContent(CONTENT_WORDS, isMatch ? "test" : undefined);
|
||||
|
||||
sql.execute(
|
||||
`INSERT INTO blobs (blobId, content, dateModified, utcDateModified) VALUES (?, ?, ?, ?)`,
|
||||
[blobId, content, now, now]
|
||||
);
|
||||
sql.execute(
|
||||
`INSERT INTO notes (noteId, title, type, mime, blobId, isProtected, isDeleted,
|
||||
dateCreated, dateModified, utcDateCreated, utcDateModified)
|
||||
VALUES (?, ?, 'text', 'text/html', ?, 0, 0, ?, ?, ?, ?)`,
|
||||
[noteId, title, blobId, now, now, now, now]
|
||||
);
|
||||
sql.execute(
|
||||
`INSERT INTO branches (branchId, noteId, parentNoteId, notePosition, isDeleted, isExpanded, utcDateModified)
|
||||
VALUES (?, ?, ?, ?, 0, 0, ?)`,
|
||||
[branchId, noteId, parentId, n * 10, now]
|
||||
);
|
||||
noteIdx++;
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
console.log(` SQL seeding: ${seedMs.toFixed(0)}ms`);
|
||||
|
||||
const [, reloadMs] = timed(() => beccaLoader.load());
|
||||
const totalNotes = Object.keys(becca.notes).length;
|
||||
console.log(` Becca reload: ${reloadMs.toFixed(0)}ms Total notes: ${totalNotes}`);
|
||||
|
||||
// ── Warm caches ──
|
||||
searchService.searchNotesForAutocomplete("test", true);
|
||||
|
||||
// ════════════════════════════════════════════
|
||||
// PROFILING AT SCALE
|
||||
// ════════════════════════════════════════════
|
||||
|
||||
console.log(`\n ════ PROFILING (${totalNotes} notes) ════\n`);
|
||||
|
||||
// 1. getCandidateNotes cost (the full-scan bottleneck)
|
||||
const allNotes = Object.values(becca.notes);
|
||||
const [, flatScanMs] = timed(() => {
|
||||
let count = 0;
|
||||
for (const note of allNotes) {
|
||||
const ft = note.getFlatText();
|
||||
if (ft.includes("test")) count++;
|
||||
}
|
||||
return count;
|
||||
});
|
||||
console.log(` getFlatText + includes scan (${allNotes.length} notes): ${flatScanMs.toFixed(1)}ms`);
|
||||
|
||||
// 2. Full findResultsWithQuery (includes candidate scan + parent walk + scoring)
|
||||
const findTimes: number[] = [];
|
||||
let findResultCount = 0;
|
||||
for (let i = 0; i < 3; i++) {
|
||||
const [r, ms] = timed(() =>
|
||||
searchService.findResultsWithQuery("test", new SearchContext({ fastSearch: true }))
|
||||
);
|
||||
findTimes.push(ms);
|
||||
findResultCount = r.length;
|
||||
}
|
||||
const findAvg = findTimes.reduce((a, b) => a + b, 0) / findTimes.length;
|
||||
console.log(` findResultsWithQuery (fast): avg ${findAvg.toFixed(1)}ms (${findResultCount} results)`);
|
||||
|
||||
// 3. Exact-only (no fuzzy)
|
||||
const exactTimes: number[] = [];
|
||||
for (let i = 0; i < 3; i++) {
|
||||
const [, ms] = timed(() =>
|
||||
searchService.findResultsWithQuery("test", new SearchContext({ fastSearch: true, enableFuzzyMatching: false }))
|
||||
);
|
||||
exactTimes.push(ms);
|
||||
}
|
||||
const exactAvg = exactTimes.reduce((a, b) => a + b, 0) / exactTimes.length;
|
||||
console.log(` findResultsWithQuery (exact): avg ${exactAvg.toFixed(1)}ms`);
|
||||
console.log(` Fuzzy overhead: ${(findAvg - exactAvg).toFixed(1)}ms`);
|
||||
|
||||
// 4. SearchResult construction + computeScore cost (isolated)
|
||||
const results = searchService.findResultsWithQuery("test", new SearchContext({ fastSearch: true }));
|
||||
console.log(` Total results before trim: ${results.length}`);
|
||||
|
||||
const [, scoreAllMs] = timed(() => {
|
||||
for (const r of results) r.computeScore("test", ["test"], true);
|
||||
});
|
||||
console.log(` computeScore × ${results.length}: ${scoreAllMs.toFixed(1)}ms (${(scoreAllMs / results.length).toFixed(3)}ms/result)`);
|
||||
|
||||
// 5. getNoteTitleForPath for all results
|
||||
const [, pathTitleMs] = timed(() => {
|
||||
for (const r of results) beccaService.getNoteTitleForPath(r.notePathArray);
|
||||
});
|
||||
console.log(` getNoteTitleForPath × ${results.length}: ${pathTitleMs.toFixed(1)}ms`);
|
||||
|
||||
// 6. Content snippet extraction (only 200)
|
||||
const trimmed = results.slice(0, 200);
|
||||
const [, snippetMs] = timed(() => {
|
||||
for (const r of trimmed) {
|
||||
r.contentSnippet = searchService.extractContentSnippet(r.noteId, ["test"]);
|
||||
}
|
||||
});
|
||||
console.log(` extractContentSnippet × 200: ${snippetMs.toFixed(1)}ms`);
|
||||
|
||||
// 7. Highlighting (only 200)
|
||||
const [, hlMs] = timed(() => {
|
||||
searchService.highlightSearchResults(trimmed, ["test"]);
|
||||
});
|
||||
console.log(` highlightSearchResults × 200: ${hlMs.toFixed(1)}ms`);
|
||||
|
||||
// 7b. getBestNotePath cost (used by fast path)
|
||||
const sampleNotes = Object.values(becca.notes).filter(n => n.title.startsWith("Test Document")).slice(0, 1000);
|
||||
const [, bestPathMs] = timed(() => {
|
||||
for (const n of sampleNotes) n.getBestNotePath();
|
||||
});
|
||||
console.log(` getBestNotePath × ${sampleNotes.length}: ${bestPathMs.toFixed(1)}ms (${(bestPathMs/sampleNotes.length).toFixed(3)}ms/note)`);
|
||||
|
||||
// 8. Full autocomplete end-to-end
|
||||
const autoTimes: number[] = [];
|
||||
let autoCount = 0;
|
||||
for (let i = 0; i < 3; i++) {
|
||||
const [r, ms] = timed(() =>
|
||||
searchService.searchNotesForAutocomplete("test", true)
|
||||
);
|
||||
autoTimes.push(ms);
|
||||
autoCount = r.length;
|
||||
}
|
||||
const autoAvg = autoTimes.reduce((a, b) => a + b, 0) / autoTimes.length;
|
||||
const autoMin = Math.min(...autoTimes);
|
||||
console.log(`\n ★ FULL AUTOCOMPLETE: avg ${autoAvg.toFixed(1)}ms min ${autoMin.toFixed(1)}ms (${autoCount} results)`);
|
||||
|
||||
// 9. With a less common search term (fewer matches)
|
||||
const rareTimes: number[] = [];
|
||||
let rareCount = 0;
|
||||
for (let i = 0; i < 3; i++) {
|
||||
const [r, ms] = timed(() =>
|
||||
searchService.searchNotesForAutocomplete("leitfaden", true)
|
||||
);
|
||||
rareTimes.push(ms);
|
||||
rareCount = r.length;
|
||||
}
|
||||
const rareAvg = rareTimes.reduce((a, b) => a + b, 0) / rareTimes.length;
|
||||
console.log(` Autocomplete "leitfaden": avg ${rareAvg.toFixed(1)}ms (${rareCount} results)`);
|
||||
|
||||
// 10. Full search (fastSearch=false) — the 2.7s bottleneck
|
||||
console.log(`\n ── Full search (fastSearch=false) ──`);
|
||||
const fullTimes: number[] = [];
|
||||
let fullCount = 0;
|
||||
for (let i = 0; i < 2; i++) {
|
||||
const [r, ms] = timed(() =>
|
||||
searchService.findResultsWithQuery("test", new SearchContext({ fastSearch: false }))
|
||||
);
|
||||
fullTimes.push(ms);
|
||||
fullCount = r.length;
|
||||
}
|
||||
const fullAvg = fullTimes.reduce((a, b) => a + b, 0) / fullTimes.length;
|
||||
console.log(` Full search (flat + SQL): avg ${fullAvg.toFixed(1)}ms (${fullCount} results)`);
|
||||
|
||||
// 11. SQL content scan alone
|
||||
const [scanCount, scanMs] = timed(() => {
|
||||
let count = 0;
|
||||
for (const row of sql.iterateRows<{ content: Buffer | string }>(`
|
||||
SELECT noteId, type, mime, content, isProtected
|
||||
FROM notes JOIN blobs USING (blobId)
|
||||
WHERE type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
||||
AND isDeleted = 0
|
||||
AND LENGTH(content) < 2097152`)) {
|
||||
count++;
|
||||
}
|
||||
return count;
|
||||
});
|
||||
console.log(` Raw SQL scan (${scanCount} rows): ${scanMs.toFixed(1)}ms`);
|
||||
|
||||
// ── Summary ──
|
||||
console.log(`\n ════ SUMMARY ════`);
|
||||
console.log(` Notes: ${totalNotes} | Matches: ${findResultCount} | Hierarchy depth: 3 (root → folder → note)`);
|
||||
console.log(` ──────────────────────────────────`);
|
||||
console.log(` Autocomplete (fast): ${autoAvg.toFixed(1)}ms`);
|
||||
console.log(` findResults: ${findAvg.toFixed(1)}ms (${((findAvg/autoAvg)*100).toFixed(0)}%)`);
|
||||
console.log(` snippets+highlight: ${(snippetMs + hlMs).toFixed(1)}ms (${(((snippetMs+hlMs)/autoAvg)*100).toFixed(0)}%)`);
|
||||
console.log(` Full search: ${fullAvg.toFixed(1)}ms`);
|
||||
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
}, 600_000);
|
||||
});
|
||||
@@ -31,9 +31,17 @@ export default class Becca {
|
||||
|
||||
allNoteSetCache: NoteSet | null;
|
||||
|
||||
/**
|
||||
* Pre-built parallel arrays for fast flat text scanning in search.
|
||||
* Avoids per-note property access overhead when iterating 50K+ notes.
|
||||
* Dirtied when notes change (along with allNoteSetCache).
|
||||
*/
|
||||
flatTextIndex: { notes: BNote[], flatTexts: string[] } | null;
|
||||
|
||||
constructor() {
|
||||
this.reset();
|
||||
this.allNoteSetCache = null;
|
||||
this.flatTextIndex = null;
|
||||
}
|
||||
|
||||
reset() {
|
||||
@@ -239,6 +247,28 @@ export default class Becca {
|
||||
/** Should be called when the set of all non-skeleton notes changes (added/removed) */
|
||||
dirtyNoteSetCache() {
|
||||
this.allNoteSetCache = null;
|
||||
this.flatTextIndex = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns pre-built parallel arrays of notes and their flat texts for fast scanning.
|
||||
* The flat texts are already normalized (lowercase, diacritics removed).
|
||||
*/
|
||||
getFlatTextIndex(): { notes: BNote[], flatTexts: string[] } {
|
||||
if (!this.flatTextIndex) {
|
||||
const allNoteSet = this.getAllNoteSet();
|
||||
const notes: BNote[] = [];
|
||||
const flatTexts: string[] = [];
|
||||
|
||||
for (const note of allNoteSet.notes) {
|
||||
notes.push(note);
|
||||
flatTexts.push(note.getFlatText());
|
||||
}
|
||||
|
||||
this.flatTextIndex = { notes, flatTexts };
|
||||
}
|
||||
|
||||
return this.flatTextIndex;
|
||||
}
|
||||
|
||||
getAllNoteSet() {
|
||||
|
||||
@@ -790,6 +790,9 @@ class BNote extends AbstractBeccaEntity<BNote> {
|
||||
this.__attributeCache = null;
|
||||
this.__inheritableAttributeCache = null;
|
||||
this.__ancestorCache = null;
|
||||
|
||||
// Dirty the becca-level flat text index since this note's flat text may have changed
|
||||
this.becca.flatTextIndex = null;
|
||||
}
|
||||
|
||||
invalidateSubTree(path: string[] = []) {
|
||||
|
||||
@@ -99,6 +99,22 @@ class NoteFlatTextExp extends Expression {
|
||||
|
||||
const candidateNotes = this.getCandidateNotes(inputNoteSet, searchContext);
|
||||
|
||||
// Fast path for single-token searches with a limit (e.g. autocomplete):
|
||||
// Skip the expensive recursive parent walk and just use getBestNotePath().
|
||||
// The flat text already matched, so we know the token is present.
|
||||
if (this.tokens.length === 1 && searchContext.limit) {
|
||||
for (const note of candidateNotes) {
|
||||
if (!resultNoteSet.hasNoteId(note.noteId)) {
|
||||
const notePath = note.getBestNotePath();
|
||||
if (notePath) {
|
||||
executionContext.noteIdToNotePath[note.noteId] = notePath;
|
||||
resultNoteSet.add(note);
|
||||
}
|
||||
}
|
||||
}
|
||||
return resultNoteSet;
|
||||
}
|
||||
|
||||
for (const note of candidateNotes) {
|
||||
// autocomplete should be able to find notes by their noteIds as well (only leafs)
|
||||
if (this.tokens.length === 1 && note.noteId.toLowerCase() === this.tokens[0]) {
|
||||
@@ -112,7 +128,7 @@ class NoteFlatTextExp extends Expression {
|
||||
// Add defensive checks for undefined properties
|
||||
const typeMatches = note.type && note.type.includes(token);
|
||||
const mimeMatches = note.mime && note.mime.includes(token);
|
||||
|
||||
|
||||
if (typeMatches || mimeMatches) {
|
||||
foundAttrTokens.push(token);
|
||||
}
|
||||
@@ -165,14 +181,38 @@ class NoteFlatTextExp extends Expression {
|
||||
getCandidateNotes(noteSet: NoteSet, searchContext?: SearchContext): BNote[] {
|
||||
const candidateNotes: BNote[] = [];
|
||||
|
||||
for (const note of noteSet.notes) {
|
||||
const normalizedFlatText = normalizeSearchText(note.getFlatText());
|
||||
// For limited searches (e.g. autocomplete), cap candidates to avoid
|
||||
// processing thousands of matches when only a few hundred are needed.
|
||||
// Use 5x the limit to ensure enough quality candidates for scoring.
|
||||
const maxCandidates = searchContext?.limit ? searchContext.limit * 5 : Infinity;
|
||||
|
||||
// Use the pre-built flat text index for fast scanning.
|
||||
// This provides pre-computed flat texts in a parallel array, avoiding
|
||||
// per-note property access overhead at large scale (50K+ notes).
|
||||
const { notes: indexNotes, flatTexts } = becca.getFlatTextIndex();
|
||||
|
||||
// Build a set for quick membership check when noteSet isn't the full set
|
||||
const isFullSet = noteSet.notes.length === indexNotes.length;
|
||||
|
||||
for (let i = 0; i < indexNotes.length; i++) {
|
||||
const note = indexNotes[i];
|
||||
|
||||
// Skip notes not in the input set (only check when not using the full set)
|
||||
if (!isFullSet && !noteSet.hasNoteId(note.noteId)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const flatText = flatTexts[i];
|
||||
for (const token of this.tokens) {
|
||||
if (this.smartMatch(normalizedFlatText, token, searchContext)) {
|
||||
if (this.smartMatch(flatText, token, searchContext)) {
|
||||
candidateNotes.push(note);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (candidateNotes.length >= maxCandidates) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return candidateNotes;
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
"use strict";
|
||||
|
||||
import normalizeString from "normalize-strings";
|
||||
import lex from "./lex.js";
|
||||
import handleParens from "./handle_parens.js";
|
||||
import parse from "./parse.js";
|
||||
@@ -8,7 +7,7 @@ import SearchResult from "../search_result.js";
|
||||
import SearchContext from "../search_context.js";
|
||||
import becca from "../../../becca/becca.js";
|
||||
import beccaService from "../../../becca/becca_service.js";
|
||||
import { normalize, escapeHtml, escapeRegExp } from "../../utils.js";
|
||||
import { normalize, removeDiacritic, escapeHtml, escapeRegExp } from "../../utils.js";
|
||||
import log from "../../log.js";
|
||||
import hoistedNoteService from "../../hoisted_note.js";
|
||||
import type BNote from "../../../becca/entities/bnote.js";
|
||||
@@ -17,7 +16,6 @@ import type { SearchParams, TokenStructure } from "./types.js";
|
||||
import type Expression from "../expressions/expression.js";
|
||||
import sql from "../../sql.js";
|
||||
import scriptService from "../../script.js";
|
||||
import striptags from "striptags";
|
||||
import protectedSessionService from "../../protected_session.js";
|
||||
|
||||
export interface SearchNoteResult {
|
||||
@@ -250,23 +248,30 @@ function findResultsWithExpression(expression: Expression, searchContext: Search
|
||||
return performSearch(expression, searchContext, false);
|
||||
}
|
||||
|
||||
// For limited searches (e.g. autocomplete), skip the expensive two-phase
|
||||
// fuzzy fallback. The user is typing and will refine their query — exact
|
||||
// matching is sufficient and avoids a second full scan of all notes.
|
||||
if (searchContext.limit) {
|
||||
return performSearch(expression, searchContext, false);
|
||||
}
|
||||
|
||||
// Phase 1: Try exact matches first (without fuzzy matching)
|
||||
const exactResults = performSearch(expression, searchContext, false);
|
||||
|
||||
|
||||
// Check if we have sufficient high-quality results
|
||||
const minResultThreshold = 5;
|
||||
const minScoreForQuality = 10; // Minimum score to consider a result "high quality"
|
||||
|
||||
|
||||
const highQualityResults = exactResults.filter(result => result.score >= minScoreForQuality);
|
||||
|
||||
|
||||
// If we have enough high-quality exact matches, return them
|
||||
if (highQualityResults.length >= minResultThreshold) {
|
||||
return exactResults;
|
||||
}
|
||||
|
||||
|
||||
// Phase 2: Add fuzzy matching as fallback when exact matches are insufficient
|
||||
const fuzzyResults = performSearch(expression, searchContext, true);
|
||||
|
||||
|
||||
// Merge results, ensuring exact matches always rank higher than fuzzy matches
|
||||
return mergeExactAndFuzzyResults(exactResults, fuzzyResults);
|
||||
}
|
||||
@@ -448,7 +453,7 @@ function extractContentSnippet(noteId: string, searchTokens: string[], maxLength
|
||||
|
||||
try {
|
||||
let content = note.getContent();
|
||||
|
||||
|
||||
if (!content || typeof content !== "string") {
|
||||
return "";
|
||||
}
|
||||
@@ -464,77 +469,66 @@ function extractContentSnippet(noteId: string, searchTokens: string[], maxLength
|
||||
return ""; // Protected but no session available
|
||||
}
|
||||
|
||||
// Strip HTML tags for text notes
|
||||
// Strip HTML tags for text notes — use fast regex for snippet extraction
|
||||
// (striptags library is ~18x slower and not needed for search snippets)
|
||||
if (note.type === "text") {
|
||||
content = striptags(content);
|
||||
content = content.replace(/<[^>]*>/g, "");
|
||||
}
|
||||
|
||||
// Normalize whitespace while preserving paragraph breaks
|
||||
// First, normalize multiple newlines to double newlines (paragraph breaks)
|
||||
content = content.replace(/\n\s*\n/g, "\n\n");
|
||||
// Then normalize spaces within lines
|
||||
content = content.split('\n').map(line => line.replace(/\s+/g, " ").trim()).join('\n');
|
||||
// Finally trim the whole content
|
||||
content = content.trim();
|
||||
|
||||
if (!content) {
|
||||
return "";
|
||||
}
|
||||
|
||||
// Try to find a snippet around the first matching token
|
||||
const normalizedContent = normalizeString(content.toLowerCase());
|
||||
// Find match position using normalize on the raw stripped content.
|
||||
// We use a single normalize() pass — no need for expensive whitespace
|
||||
// normalization just to find the match index.
|
||||
const normalizedContent = normalize(content);
|
||||
const normalizedTokens = searchTokens.map(token => normalize(token));
|
||||
let snippetStart = 0;
|
||||
let matchFound = false;
|
||||
|
||||
for (const token of searchTokens) {
|
||||
const normalizedToken = normalizeString(token.toLowerCase());
|
||||
for (const normalizedToken of normalizedTokens) {
|
||||
const matchIndex = normalizedContent.indexOf(normalizedToken);
|
||||
|
||||
|
||||
if (matchIndex !== -1) {
|
||||
// Center the snippet around the match
|
||||
snippetStart = Math.max(0, matchIndex - maxLength / 2);
|
||||
matchFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Extract snippet
|
||||
let snippet = content.substring(snippetStart, snippetStart + maxLength);
|
||||
// Extract a snippet region from the raw content, then clean only that
|
||||
const snippetRegion = content.substring(snippetStart, snippetStart + maxLength + 100);
|
||||
|
||||
// If snippet contains linebreaks, limit to max 4 lines and override character limit
|
||||
// Normalize whitespace only on the small snippet region
|
||||
let snippet = snippetRegion
|
||||
.replace(/\n\s*\n/g, "\n\n")
|
||||
.replace(/[ \t]+/g, " ")
|
||||
.trim()
|
||||
.substring(0, maxLength);
|
||||
|
||||
// If snippet contains linebreaks, limit to max 4 lines
|
||||
const lines = snippet.split('\n');
|
||||
if (lines.length > 4) {
|
||||
// Find which lines contain the search tokens to ensure they're included
|
||||
const normalizedLines = lines.map(line => normalizeString(line.toLowerCase()));
|
||||
const normalizedTokens = searchTokens.map(token => normalizeString(token.toLowerCase()));
|
||||
|
||||
// Find the first line that contains a search token
|
||||
let firstMatchLine = -1;
|
||||
for (let i = 0; i < normalizedLines.length; i++) {
|
||||
if (normalizedTokens.some(token => normalizedLines[i].includes(token))) {
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const normalizedLine = normalize(lines[i]);
|
||||
if (normalizedTokens.some(token => normalizedLine.includes(token))) {
|
||||
firstMatchLine = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (firstMatchLine !== -1) {
|
||||
// Center the 4-line window around the first match
|
||||
// Try to show 1 line before and 2 lines after the match
|
||||
const startLine = Math.max(0, firstMatchLine - 1);
|
||||
const endLine = Math.min(lines.length, startLine + 4);
|
||||
snippet = lines.slice(startLine, endLine).join('\n');
|
||||
} else {
|
||||
// No match found in lines (shouldn't happen), just take first 4
|
||||
snippet = lines.slice(0, 4).join('\n');
|
||||
}
|
||||
// Add ellipsis if we truncated lines
|
||||
snippet = snippet + "...";
|
||||
} else if (lines.length > 1) {
|
||||
// For multi-line snippets that are 4 or fewer lines, keep them as-is
|
||||
// No need to truncate
|
||||
} else {
|
||||
// Single line content - apply original word boundary logic
|
||||
// Try to start/end at word boundaries
|
||||
} else if (lines.length <= 1) {
|
||||
// Single line content - apply word boundary logic
|
||||
if (snippetStart > 0) {
|
||||
const firstSpace = snippet.search(/\s/);
|
||||
if (firstSpace > 0 && firstSpace < 20) {
|
||||
@@ -542,7 +536,7 @@ function extractContentSnippet(noteId: string, searchTokens: string[], maxLength
|
||||
}
|
||||
snippet = "..." + snippet;
|
||||
}
|
||||
|
||||
|
||||
if (snippetStart + maxLength < content.length) {
|
||||
const lastSpace = snippet.search(/\s[^\s]*$/);
|
||||
if (lastSpace > snippet.length - 20 && lastSpace > 0) {
|
||||
@@ -582,7 +576,7 @@ function extractAttributeSnippet(noteId: string, searchTokens: string[], maxLeng
|
||||
|
||||
// Check if any search token matches the attribute name or value
|
||||
const hasMatch = searchTokens.some(token => {
|
||||
const normalizedToken = normalizeString(token.toLowerCase());
|
||||
const normalizedToken = normalize(token);
|
||||
return attrName.includes(normalizedToken) || attrValue.includes(normalizedToken);
|
||||
});
|
||||
|
||||
@@ -650,7 +644,8 @@ function searchNotesForAutocomplete(query: string, fastSearch: boolean = true) {
|
||||
includeHiddenNotes: true,
|
||||
fuzzyAttributeSearch: true,
|
||||
ignoreInternalAttributes: true,
|
||||
ancestorNoteId: hoistedNoteService.isHoistedInHiddenSubtree() ? "root" : hoistedNoteService.getHoistedNoteId()
|
||||
ancestorNoteId: hoistedNoteService.isHoistedInHiddenSubtree() ? "root" : hoistedNoteService.getHoistedNoteId(),
|
||||
limit: 200
|
||||
});
|
||||
|
||||
const allSearchResults = findResultsWithQuery(query, searchContext);
|
||||
@@ -734,7 +729,7 @@ function highlightSearchResults(searchResults: SearchResult[], highlightedTokens
|
||||
// Highlight in note path title
|
||||
if (result.highlightedNotePathTitle) {
|
||||
const titleRegex = new RegExp(escapeRegExp(token), "gi");
|
||||
while ((match = titleRegex.exec(normalizeString(result.highlightedNotePathTitle))) !== null) {
|
||||
while ((match = titleRegex.exec(removeDiacritic(result.highlightedNotePathTitle))) !== null) {
|
||||
result.highlightedNotePathTitle = wrapText(result.highlightedNotePathTitle, match.index, token.length, "{", "}");
|
||||
// 2 characters are added, so we need to adjust the index
|
||||
titleRegex.lastIndex += 2;
|
||||
@@ -744,7 +739,7 @@ function highlightSearchResults(searchResults: SearchResult[], highlightedTokens
|
||||
// Highlight in content snippet
|
||||
if (result.highlightedContentSnippet) {
|
||||
const contentRegex = new RegExp(escapeRegExp(token), "gi");
|
||||
while ((match = contentRegex.exec(normalizeString(result.highlightedContentSnippet))) !== null) {
|
||||
while ((match = contentRegex.exec(removeDiacritic(result.highlightedContentSnippet))) !== null) {
|
||||
result.highlightedContentSnippet = wrapText(result.highlightedContentSnippet, match.index, token.length, "{", "}");
|
||||
// 2 characters are added, so we need to adjust the index
|
||||
contentRegex.lastIndex += 2;
|
||||
@@ -754,7 +749,7 @@ function highlightSearchResults(searchResults: SearchResult[], highlightedTokens
|
||||
// Highlight in attribute snippet
|
||||
if (result.highlightedAttributeSnippet) {
|
||||
const attributeRegex = new RegExp(escapeRegExp(token), "gi");
|
||||
while ((match = attributeRegex.exec(normalizeString(result.highlightedAttributeSnippet))) !== null) {
|
||||
while ((match = attributeRegex.exec(removeDiacritic(result.highlightedAttributeSnippet))) !== null) {
|
||||
result.highlightedAttributeSnippet = wrapText(result.highlightedAttributeSnippet, match.index, token.length, "{", "}");
|
||||
// 2 characters are added, so we need to adjust the index
|
||||
attributeRegex.lastIndex += 2;
|
||||
|
||||
@@ -0,0 +1,526 @@
|
||||
/**
|
||||
* Search performance profiling tests.
|
||||
*
|
||||
* These tests measure where time is spent in the search pipeline.
|
||||
* We monkeypatch note.getContent() to return synthetic HTML content
|
||||
* since unit tests don't have a real SQLite database.
|
||||
*
|
||||
* KNOWN GAPS vs production:
|
||||
* - note.getContent() is instant (monkeypatched) vs ~2ms SQL fetch
|
||||
* - NoteContentFulltextExp.execute() is skipped (no sql.iterateRows)
|
||||
* because fastSearch=true uses only NoteFlatTextExp
|
||||
* - These tests focus on the in-memory/CPU-bound parts of the pipeline
|
||||
*/
|
||||
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
||||
import searchService from "./search.js";
|
||||
import BNote from "../../../becca/entities/bnote.js";
|
||||
import BBranch from "../../../becca/entities/bbranch.js";
|
||||
import SearchContext from "../search_context.js";
|
||||
import becca from "../../../becca/becca.js";
|
||||
import beccaService from "../../../becca/becca_service.js";
|
||||
import { NoteBuilder, note, id } from "../../../test/becca_mocking.js";
|
||||
import SearchResult from "../search_result.js";
|
||||
import { normalizeSearchText } from "../utils/text_utils.js";
|
||||
|
||||
// ── helpers ──────────────────────────────────────────────────────────
|
||||
|
||||
function randomWord(len = 6): string {
|
||||
const chars = "abcdefghijklmnopqrstuvwxyz";
|
||||
let word = "";
|
||||
for (let i = 0; i < len; i++) {
|
||||
word += chars[Math.floor(Math.random() * chars.length)];
|
||||
}
|
||||
return word;
|
||||
}
|
||||
|
||||
function generateHtmlContent(wordCount: number, includeTarget = false): string {
|
||||
const paragraphs: string[] = [];
|
||||
let wordsRemaining = wordCount;
|
||||
|
||||
while (wordsRemaining > 0) {
|
||||
const paraWords = Math.min(wordsRemaining, 20 + Math.floor(Math.random() * 40));
|
||||
const words: string[] = [];
|
||||
for (let i = 0; i < paraWords; i++) {
|
||||
words.push(randomWord(3 + Math.floor(Math.random() * 10)));
|
||||
}
|
||||
if (includeTarget && paragraphs.length === 2) {
|
||||
words[Math.floor(words.length / 2)] = "target";
|
||||
}
|
||||
paragraphs.push(`<p>${words.join(" ")}</p>`);
|
||||
wordsRemaining -= paraWords;
|
||||
}
|
||||
|
||||
return `<html><body>${paragraphs.join("\n")}</body></html>`;
|
||||
}
|
||||
|
||||
function timed<T>(fn: () => T): [T, number] {
|
||||
const start = performance.now();
|
||||
const result = fn();
|
||||
return [result, performance.now() - start];
|
||||
}
|
||||
|
||||
interface TimingEntry { label: string; ms: number; }
|
||||
|
||||
function reportTimings(title: string, timings: TimingEntry[]) {
|
||||
const total = timings.reduce((s, t) => s + t.ms, 0);
|
||||
console.log(`\n=== ${title} (total: ${total.toFixed(1)}ms) ===`);
|
||||
for (const { label, ms } of timings) {
|
||||
const pct = total > 0 ? ((ms / total) * 100).toFixed(0) : "0";
|
||||
const bar = "#".repeat(Math.max(1, Math.round(ms / total * 40)));
|
||||
console.log(` ${label.padEnd(55)} ${ms.toFixed(1).padStart(8)}ms ${pct.padStart(3)}% ${bar}`);
|
||||
}
|
||||
}
|
||||
|
||||
// ── dataset builder ──────────────────────────────────────────────────
|
||||
|
||||
const syntheticContent: Record<string, string> = {};
|
||||
|
||||
function buildDataset(noteCount: number, opts: {
|
||||
matchFraction?: number;
|
||||
labelsPerNote?: number;
|
||||
depth?: number;
|
||||
contentWordCount?: number;
|
||||
} = {}) {
|
||||
const {
|
||||
matchFraction = 0.1,
|
||||
labelsPerNote = 3,
|
||||
depth = 3,
|
||||
contentWordCount = 200,
|
||||
} = opts;
|
||||
|
||||
becca.reset();
|
||||
for (const key of Object.keys(syntheticContent)) {
|
||||
delete syntheticContent[key];
|
||||
}
|
||||
|
||||
const rootNote = new NoteBuilder(new BNote({ noteId: "root", title: "root", type: "text" }));
|
||||
new BBranch({
|
||||
branchId: "none_root",
|
||||
noteId: "root",
|
||||
parentNoteId: "none",
|
||||
notePosition: 10
|
||||
});
|
||||
|
||||
const containers: NoteBuilder[] = [];
|
||||
let parent = rootNote;
|
||||
for (let d = 0; d < depth; d++) {
|
||||
const container = note(`Container_${d}_${randomWord(4)}`);
|
||||
parent.child(container);
|
||||
containers.push(container);
|
||||
parent = container;
|
||||
}
|
||||
|
||||
const matchCount = Math.floor(noteCount * matchFraction);
|
||||
|
||||
for (let i = 0; i < noteCount; i++) {
|
||||
const isMatch = i < matchCount;
|
||||
const title = isMatch
|
||||
? `${randomWord(5)} target ${randomWord(5)} Document ${i}`
|
||||
: `${randomWord(5)} ${randomWord(6)} ${randomWord(4)} Note ${i}`;
|
||||
|
||||
const n = note(title);
|
||||
|
||||
for (let l = 0; l < labelsPerNote; l++) {
|
||||
const labelName = isMatch && l === 0 ? "category" : `label_${randomWord(4)}`;
|
||||
const labelValue = isMatch && l === 0 ? "important target" : randomWord(8);
|
||||
n.label(labelName, labelValue);
|
||||
}
|
||||
|
||||
syntheticContent[n.note.noteId] = generateHtmlContent(contentWordCount, isMatch);
|
||||
|
||||
const containerIndex = i % containers.length;
|
||||
containers[containerIndex].child(n);
|
||||
}
|
||||
|
||||
// Monkeypatch getContent()
|
||||
for (const noteObj of Object.values(becca.notes)) {
|
||||
const noteId = noteObj.noteId;
|
||||
if (syntheticContent[noteId]) {
|
||||
(noteObj as any).getContent = () => syntheticContent[noteId];
|
||||
} else {
|
||||
(noteObj as any).getContent = () => "";
|
||||
}
|
||||
}
|
||||
|
||||
return { rootNote, matchCount };
|
||||
}
|
||||
|
||||
// ── profiling tests ──────────────────────────────────────────────────
|
||||
|
||||
describe("Search Profiling", () => {
|
||||
|
||||
afterEach(() => {
|
||||
becca.reset();
|
||||
});
|
||||
|
||||
/**
|
||||
* Break down the autocomplete pipeline into every individual stage,
|
||||
* including previously unmeasured operations like getBestNotePath,
|
||||
* SearchResult construction, and getNoteTitleForPath.
|
||||
*/
|
||||
describe("Granular autocomplete pipeline", () => {
|
||||
|
||||
for (const noteCount of [500, 2000, 5000, 10000]) {
|
||||
it(`granular breakdown with ${noteCount} notes`, () => {
|
||||
const timings: TimingEntry[] = [];
|
||||
|
||||
const [, buildMs] = timed(() => buildDataset(noteCount, {
|
||||
matchFraction: 0.2,
|
||||
contentWordCount: 300,
|
||||
depth: 5
|
||||
}));
|
||||
timings.push({ label: `Dataset build (${noteCount} notes)`, ms: buildMs });
|
||||
|
||||
// === NoteFlatTextExp: getCandidateNotes ===
|
||||
// This calls getFlatText() + normalizeSearchText() for EVERY note
|
||||
const allNotes = Object.values(becca.notes);
|
||||
for (const n of allNotes) n.invalidateThisCache();
|
||||
|
||||
const [, candidateMs] = timed(() => {
|
||||
const token = normalizeSearchText("target");
|
||||
let count = 0;
|
||||
for (const n of allNotes) {
|
||||
const flatText = normalizeSearchText(n.getFlatText());
|
||||
if (flatText.includes(token)) count++;
|
||||
}
|
||||
return count;
|
||||
});
|
||||
timings.push({ label: `getCandidateNotes simulation (cold caches)`, ms: candidateMs });
|
||||
|
||||
// Warm cache version
|
||||
const [candidateCount, candidateWarmMs] = timed(() => {
|
||||
const token = normalizeSearchText("target");
|
||||
let count = 0;
|
||||
for (const n of allNotes) {
|
||||
const flatText = normalizeSearchText(n.getFlatText());
|
||||
if (flatText.includes(token)) count++;
|
||||
}
|
||||
return count;
|
||||
});
|
||||
timings.push({ label: `getCandidateNotes simulation (warm caches)`, ms: candidateWarmMs });
|
||||
|
||||
// === getBestNotePath for each candidate ===
|
||||
const candidates = allNotes.filter(n => {
|
||||
const flatText = normalizeSearchText(n.getFlatText());
|
||||
return flatText.includes("target");
|
||||
});
|
||||
|
||||
const [, pathMs] = timed(() => {
|
||||
for (const n of candidates) {
|
||||
n.getBestNotePath();
|
||||
}
|
||||
});
|
||||
timings.push({ label: `getBestNotePath (${candidates.length} notes)`, ms: pathMs });
|
||||
|
||||
// === SearchResult construction (includes getNoteTitleForPath) ===
|
||||
const paths = candidates.map(n => n.getBestNotePath()).filter(Boolean);
|
||||
|
||||
const [searchResults, srMs] = timed(() => {
|
||||
return paths.map(p => new SearchResult(p));
|
||||
});
|
||||
timings.push({ label: `SearchResult construction (${paths.length} results)`, ms: srMs });
|
||||
|
||||
// === computeScore ===
|
||||
const [, scoreMs] = timed(() => {
|
||||
for (const r of searchResults) {
|
||||
r.computeScore("target", ["target"], true);
|
||||
}
|
||||
});
|
||||
timings.push({ label: `computeScore with fuzzy (${searchResults.length} results)`, ms: scoreMs });
|
||||
|
||||
const [, scoreNoFuzzyMs] = timed(() => {
|
||||
for (const r of searchResults) {
|
||||
r.computeScore("target", ["target"], false);
|
||||
}
|
||||
});
|
||||
timings.push({ label: `computeScore no-fuzzy`, ms: scoreNoFuzzyMs });
|
||||
|
||||
// === Sorting ===
|
||||
const [, sortMs] = timed(() => {
|
||||
searchResults.sort((a, b) => {
|
||||
if (a.score !== b.score) return b.score - a.score;
|
||||
if (a.notePathArray.length === b.notePathArray.length) {
|
||||
return a.notePathTitle < b.notePathTitle ? -1 : 1;
|
||||
}
|
||||
return a.notePathArray.length - b.notePathArray.length;
|
||||
});
|
||||
});
|
||||
timings.push({ label: `Sort results`, ms: sortMs });
|
||||
|
||||
// === Trim + content snippet extraction ===
|
||||
const trimmed = searchResults.slice(0, 200);
|
||||
|
||||
const [, snippetMs] = timed(() => {
|
||||
for (const r of trimmed) {
|
||||
r.contentSnippet = searchService.extractContentSnippet(
|
||||
r.noteId, ["target"]
|
||||
);
|
||||
}
|
||||
});
|
||||
timings.push({ label: `Content snippet extraction (${trimmed.length} results)`, ms: snippetMs });
|
||||
|
||||
const [, attrMs] = timed(() => {
|
||||
for (const r of trimmed) {
|
||||
r.attributeSnippet = searchService.extractAttributeSnippet(
|
||||
r.noteId, ["target"]
|
||||
);
|
||||
}
|
||||
});
|
||||
timings.push({ label: `Attribute snippet extraction`, ms: attrMs });
|
||||
|
||||
// === Highlighting ===
|
||||
const [, hlMs] = timed(() => {
|
||||
searchService.highlightSearchResults(trimmed, ["target"]);
|
||||
});
|
||||
timings.push({ label: `Highlighting`, ms: hlMs });
|
||||
|
||||
// === Final mapping (getNoteTitleAndIcon) ===
|
||||
const [, mapMs] = timed(() => {
|
||||
for (const r of trimmed) {
|
||||
beccaService.getNoteTitleAndIcon(r.noteId);
|
||||
}
|
||||
});
|
||||
timings.push({ label: `getNoteTitleAndIcon (${trimmed.length} results)`, ms: mapMs });
|
||||
|
||||
// === Full autocomplete for comparison ===
|
||||
const [autoResults, autoMs] = timed(() => {
|
||||
return searchService.searchNotesForAutocomplete("target", true);
|
||||
});
|
||||
timings.push({ label: `Full autocomplete call (end-to-end)`, ms: autoMs });
|
||||
|
||||
reportTimings(`Granular Autocomplete — ${noteCount} notes`, timings);
|
||||
expect(autoResults.length).toBeGreaterThan(0);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Test the specific cost of normalizeSearchText which is called
|
||||
* pervasively throughout the pipeline.
|
||||
*/
|
||||
describe("normalizeSearchText cost", () => {
|
||||
|
||||
it("profile normalizeSearchText at scale", () => {
|
||||
buildDataset(5000, { matchFraction: 0.2, contentWordCount: 100 });
|
||||
|
||||
// Generate various text lengths to profile
|
||||
const shortTexts = Array.from({ length: 5000 }, () => randomWord(10));
|
||||
const mediumTexts = Array.from({ length: 5000 }, () =>
|
||||
Array.from({ length: 20 }, () => randomWord(6)).join(" ")
|
||||
);
|
||||
const longTexts = Object.values(becca.notes).map(n => n.getFlatText());
|
||||
|
||||
console.log("\n=== normalizeSearchText cost ===");
|
||||
|
||||
const [, shortMs] = timed(() => {
|
||||
for (const t of shortTexts) normalizeSearchText(t);
|
||||
});
|
||||
console.log(` 5000 short texts (10 chars): ${shortMs.toFixed(1)}ms (${(shortMs/5000*1000).toFixed(1)}µs/call)`);
|
||||
|
||||
const [, medMs] = timed(() => {
|
||||
for (const t of mediumTexts) normalizeSearchText(t);
|
||||
});
|
||||
console.log(` 5000 medium texts (120 chars): ${medMs.toFixed(1)}ms (${(medMs/5000*1000).toFixed(1)}µs/call)`);
|
||||
|
||||
const [, longMs] = timed(() => {
|
||||
for (const t of longTexts) normalizeSearchText(t);
|
||||
});
|
||||
console.log(` ${longTexts.length} flat texts (varying): ${longMs.toFixed(1)}ms (${(longMs/longTexts.length*1000).toFixed(1)}µs/call)`);
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* Test the searchPathTowardsRoot recursive walk which runs
|
||||
* for every candidate note in NoteFlatTextExp.
|
||||
*/
|
||||
describe("searchPathTowardsRoot cost", () => {
|
||||
|
||||
it("profile recursive walk with varying hierarchy depth", () => {
|
||||
console.log("\n=== Search path walk vs hierarchy depth ===");
|
||||
|
||||
for (const depth of [3, 5, 8, 12]) {
|
||||
buildDataset(2000, {
|
||||
matchFraction: 0.15,
|
||||
depth,
|
||||
contentWordCount: 50
|
||||
});
|
||||
|
||||
const [results, ms] = timed(() => {
|
||||
const ctx = new SearchContext({ fastSearch: true });
|
||||
return searchService.findResultsWithQuery("target", ctx);
|
||||
});
|
||||
console.log(` depth=${depth}: ${ms.toFixed(1)}ms (${results.length} results)`);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* Content snippet extraction scaling — the operation that calls
|
||||
* note.getContent() for each result.
|
||||
*/
|
||||
describe("Content snippet extraction", () => {
|
||||
|
||||
it("profile snippet extraction with varying content sizes", () => {
|
||||
console.log("\n=== Content snippet extraction vs content size ===");
|
||||
|
||||
for (const wordCount of [50, 200, 500, 1000, 2000, 5000]) {
|
||||
buildDataset(500, {
|
||||
matchFraction: 0.5,
|
||||
contentWordCount: wordCount
|
||||
});
|
||||
|
||||
const ctx = new SearchContext({ fastSearch: true });
|
||||
const results = searchService.findResultsWithQuery("target", ctx);
|
||||
const trimmed = results.slice(0, 200);
|
||||
|
||||
const [, ms] = timed(() => {
|
||||
for (const r of trimmed) {
|
||||
r.contentSnippet = searchService.extractContentSnippet(
|
||||
r.noteId, ["target"]
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
const avgContentLen = Object.values(syntheticContent)
|
||||
.slice(0, 100)
|
||||
.reduce((s, c) => s + c.length, 0) / 100;
|
||||
|
||||
console.log(` ${String(wordCount).padStart(5)} words/note (avg ${Math.round(avgContentLen)} chars) × ${trimmed.length} results: ${ms.toFixed(1)}ms (${(ms / trimmed.length).toFixed(3)}ms/note)`);
|
||||
}
|
||||
});
|
||||
|
||||
it("profile snippet extraction with varying result counts", () => {
|
||||
console.log("\n=== Content snippet extraction vs result count ===");
|
||||
|
||||
buildDataset(2000, {
|
||||
matchFraction: 0.5,
|
||||
contentWordCount: 500
|
||||
});
|
||||
|
||||
const ctx = new SearchContext({ fastSearch: true });
|
||||
const allResults = searchService.findResultsWithQuery("target", ctx);
|
||||
|
||||
for (const count of [5, 10, 20, 50, 100, 200]) {
|
||||
const subset = allResults.slice(0, count);
|
||||
|
||||
const [, ms] = timed(() => {
|
||||
for (const r of subset) {
|
||||
r.contentSnippet = searchService.extractContentSnippet(
|
||||
r.noteId, ["target"]
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
console.log(` ${String(count).padStart(3)} results: ${ms.toFixed(1)}ms (${(ms / count).toFixed(3)}ms/note)`);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* Two-phase exact/fuzzy search cost.
|
||||
*/
|
||||
describe("Two-phase search cost", () => {
|
||||
|
||||
for (const noteCount of [1000, 5000, 10000]) {
|
||||
it(`exact vs progressive with ${noteCount} notes`, () => {
|
||||
const timings: TimingEntry[] = [];
|
||||
|
||||
buildDataset(noteCount, { matchFraction: 0.005, contentWordCount: 50 });
|
||||
|
||||
const [exactR, exactMs] = timed(() => {
|
||||
const ctx = new SearchContext({ fastSearch: true });
|
||||
ctx.enableFuzzyMatching = false;
|
||||
return searchService.findResultsWithQuery("target", ctx);
|
||||
});
|
||||
timings.push({ label: `Exact-only (${exactR.length} results)`, ms: exactMs });
|
||||
|
||||
const [progR, progMs] = timed(() => {
|
||||
const ctx = new SearchContext({ fastSearch: true });
|
||||
return searchService.findResultsWithQuery("target", ctx);
|
||||
});
|
||||
timings.push({ label: `Progressive exact→fuzzy (${progR.length} results)`, ms: progMs });
|
||||
|
||||
const overhead = progMs - exactMs;
|
||||
timings.push({ label: `Fuzzy phase overhead`, ms: Math.max(0, overhead) });
|
||||
|
||||
reportTimings(`Two-phase — ${noteCount} notes`, timings);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* End-to-end scaling to give the full picture.
|
||||
*/
|
||||
describe("End-to-end scaling", () => {
|
||||
|
||||
it("autocomplete at different scales", () => {
|
||||
console.log("\n=== End-to-end autocomplete scaling ===");
|
||||
console.log(" (fastSearch=true, monkeypatched getContent, no real SQL)");
|
||||
|
||||
for (const noteCount of [100, 500, 1000, 2000, 5000, 10000, 20000]) {
|
||||
buildDataset(noteCount, {
|
||||
matchFraction: 0.2,
|
||||
contentWordCount: 300,
|
||||
depth: 4
|
||||
});
|
||||
|
||||
// Warm up
|
||||
searchService.searchNotesForAutocomplete("target", true);
|
||||
|
||||
const times: number[] = [];
|
||||
for (let i = 0; i < 3; i++) {
|
||||
const [, ms] = timed(() => searchService.searchNotesForAutocomplete("target", true));
|
||||
times.push(ms);
|
||||
}
|
||||
|
||||
const avg = times.reduce((a, b) => a + b, 0) / times.length;
|
||||
const min = Math.min(...times);
|
||||
|
||||
console.log(
|
||||
` ${String(noteCount).padStart(6)} notes: avg ${avg.toFixed(1)}ms ` +
|
||||
`min ${min.toFixed(1)}ms`
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
it("compare fast vs non-fast search", () => {
|
||||
console.log("\n=== Fast vs non-fast search (no real SQL for content) ===");
|
||||
|
||||
for (const noteCount of [500, 2000, 5000]) {
|
||||
buildDataset(noteCount, {
|
||||
matchFraction: 0.2,
|
||||
contentWordCount: 200,
|
||||
depth: 4
|
||||
});
|
||||
|
||||
const [, fastMs] = timed(() => {
|
||||
const ctx = new SearchContext({ fastSearch: true });
|
||||
return searchService.findResultsWithQuery("target", ctx);
|
||||
});
|
||||
|
||||
// Non-fast search tries NoteContentFulltextExp which uses sql.iterateRows
|
||||
// This will likely fail/return empty since there's no real DB, but we
|
||||
// can still measure the overhead of attempting it
|
||||
let nonFastMs: number;
|
||||
let nonFastCount: number;
|
||||
try {
|
||||
const [results, ms] = timed(() => {
|
||||
const ctx = new SearchContext({ fastSearch: false });
|
||||
return searchService.findResultsWithQuery("target", ctx);
|
||||
});
|
||||
nonFastMs = ms;
|
||||
nonFastCount = results.length;
|
||||
} catch {
|
||||
nonFastMs = -1;
|
||||
nonFastCount = -1;
|
||||
}
|
||||
|
||||
console.log(
|
||||
` ${String(noteCount).padStart(5)} notes: fast=${fastMs.toFixed(1)}ms ` +
|
||||
`non-fast=${nonFastMs >= 0 ? nonFastMs.toFixed(1) + 'ms' : 'FAILED (no real DB)'} ` +
|
||||
`(non-fast results: ${nonFastCount})`
|
||||
);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user