feat(search): also support the use of ="exact match search string"

2025-11-13 00:35:50 +01:00 · 2025-10-10 12:23:57 -07:00
parent 50f0b88eff
commit 4fa4112840
5 changed files with 302 additions and 21 deletions
--- a/apps/server/src/services/search/expressions/note_content_fulltext.ts
+++ b/apps/server/src/services/search/expressions/note_content_fulltext.ts
@@ -77,15 +77,43 @@ class NoteContentFulltextExp extends Expression {

        const resultNoteSet = new NoteSet();

+        // Search through notes with content
        for (const row of sql.iterateRows<SearchRow>(`
                SELECT noteId, type, mime, content, isProtected
                FROM notes JOIN blobs USING (blobId)
-                WHERE type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') 
-                  AND isDeleted = 0 
+                WHERE type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
+                  AND isDeleted = 0
                  AND LENGTH(content) < ${MAX_SEARCH_CONTENT_SIZE}`)) {
            this.findInText(row, inputNoteSet, resultNoteSet);
        }

+        // For exact match with flatText, also search notes WITHOUT content (they may have matching attributes)
+        if (this.flatText && (this.operator === "=" || this.operator === "!=")) {
+            for (const noteId of inputNoteSet.noteIdSet) {
+                // Skip if already found or doesn't exist
+                if (resultNoteSet.hasNoteId(noteId) || !(noteId in becca.notes)) {
+                    continue;
+                }
+
+                const note = becca.notes[noteId];
+                const flatText = note.getFlatText();
+
+                // For flatText, only check attribute values (format: #name=value or ~name=value)
+                // Don't match against noteId, type, mime, or title which are also in flatText
+                let matches = false;
+                const phrase = this.tokens.join(" ");
+                const normalizedPhrase = normalizeSearchText(phrase);
+                const normalizedFlatText = normalizeSearchText(flatText);
+
+                // Check if =phrase appears in flatText (indicates attribute value match)
+                matches = normalizedFlatText.includes(`=${normalizedPhrase}`);
+
+                if ((this.operator === "=" && matches) || (this.operator === "!=" && !matches)) {
+                    resultNoteSet.add(note);
+                }
+            }
+        }
+
        return resultNoteSet;
    }

@@ -103,6 +131,32 @@ class NoteContentFulltextExp extends Expression {
        return words.some(word => word === normalizedToken);
    }

+    /**
+     * Checks if content contains the exact phrase (consecutive words in order)
+     * This is case-insensitive since content and tokens are already normalized
+     */
+    private containsExactPhrase(tokens: string[], content: string, checkFlatTextAttributes: boolean = false): boolean {
+        const normalizedTokens = tokens.map(t => normalizeSearchText(t));
+        const normalizedContent = normalizeSearchText(content);
+
+        // Join tokens with single space to form the phrase
+        const phrase = normalizedTokens.join(" ");
+
+        // Check if the phrase appears as a substring (consecutive words)
+        if (normalizedContent.includes(phrase)) {
+            return true;
+        }
+
+        // For flatText, also check if the phrase appears in attribute values
+        // Attributes in flatText appear as "#name=value" or "~name=value"
+        // So we need to check for "=phrase" to match attribute values
+        if (checkFlatTextAttributes && normalizedContent.includes(`=${phrase}`)) {
+            return true;
+        }
+
+        return false;
+    }
+
    findInText({ noteId, isProtected, content, type, mime }: SearchRow, inputNoteSet: NoteSet, resultNoteSet: NoteSet) {
        if (!inputNoteSet.hasNoteId(noteId) || !(noteId in becca.notes)) {
            return;
@@ -137,9 +191,25 @@ class NoteContentFulltextExp extends Expression {
        if (this.tokens.length === 1) {
            const [token] = this.tokens;

+            let matches = false;
+            if (this.operator === "=") {
+                matches = this.containsExactWord(token, content);
+                // Also check flatText if enabled (includes attributes)
+                if (!matches && this.flatText) {
+                    const flatText = becca.notes[noteId].getFlatText();
+                    matches = this.containsExactPhrase([token], flatText, true);
+                }
+            } else if (this.operator === "!=") {
+                matches = !this.containsExactWord(token, content);
+                // For negation, check flatText too
+                if (matches && this.flatText) {
+                    const flatText = becca.notes[noteId].getFlatText();
+                    matches = !this.containsExactPhrase([token], flatText, true);
+                }
+            }
+
            if (
-                (this.operator === "=" && this.containsExactWord(token, content)) ||
-                (this.operator === "!=" && !this.containsExactWord(token, content)) ||
+                matches ||
                (this.operator === "*=" && content.endsWith(token)) ||
                (this.operator === "=*" && content.startsWith(token)) ||
                (this.operator === "*=*" && content.includes(token)) ||
@@ -152,10 +222,26 @@ class NoteContentFulltextExp extends Expression {
        } else {
            // Multi-token matching with fuzzy support and phrase proximity
            if (this.operator === "~=" || this.operator === "~*") {
+                // Fuzzy phrase matching
                if (this.matchesWithFuzzy(content, noteId)) {
                    resultNoteSet.add(becca.notes[noteId]);
                }
+            } else if (this.operator === "=" || this.operator === "!=") {
+                // Exact phrase matching for = and !=
+                let matches = this.containsExactPhrase(this.tokens, content, false);
+
+                // Also check flatText if enabled (includes attributes)
+                if (!matches && this.flatText) {
+                    const flatText = becca.notes[noteId].getFlatText();
+                    matches = this.containsExactPhrase(this.tokens, flatText, true);
+                }
+
+                if ((this.operator === "=" && matches) ||
+                    (this.operator === "!=" && !matches)) {
+                    resultNoteSet.add(becca.notes[noteId]);
+                }
            } else {
+                // Other operators: check all tokens present (any order)
                const nonMatchingToken = this.tokens.find(
                    (token) =>
                        !this.tokenMatchesContent(token, content, noteId)
--- a/apps/server/src/services/search/services/build_comparator.ts
+++ b/apps/server/src/services/search/services/build_comparator.ts
@@ -14,24 +14,35 @@ type Comparator<T> = (comparedValue: T) => (val: string) => boolean;

 const stringComparators: Record<string, Comparator<string>> = {
    "=": (comparedValue) => (val) => {
-        // For the = operator, check if the value contains the exact word (word-boundary matching)
+        // For the = operator, check if the value contains the exact word or phrase
        // This is case-insensitive since both values are already lowercased
        if (!val) return false;

        const normalizedVal = normalizeSearchText(val);
        const normalizedCompared = normalizeSearchText(comparedValue);

-        // Split into words and check for exact match
+        // If comparedValue has multiple words, check for exact phrase
+        if (normalizedCompared.includes(" ")) {
+            return normalizedVal.includes(normalizedCompared);
+        }
+
+        // For single word, split into words and check for exact match
        const words = normalizedVal.split(/\s+/);
        return words.some(word => word === normalizedCompared);
    },
    "!=": (comparedValue) => (val) => {
-        // Negation of exact word match
+        // Negation of exact word/phrase match
        if (!val) return true;

        const normalizedVal = normalizeSearchText(val);
        const normalizedCompared = normalizeSearchText(comparedValue);

+        // If comparedValue has multiple words, check for exact phrase
+        if (normalizedCompared.includes(" ")) {
+            return !normalizedVal.includes(normalizedCompared);
+        }
+
+        // For single word, split into words and check for exact match
        const words = normalizedVal.split(/\s+/);
        return !words.some(word => word === normalizedCompared);
    },
--- a/apps/server/src/services/search/services/parse.ts
+++ b/apps/server/src/services/search/services/parse.ts
@@ -38,11 +38,14 @@ function getFulltext(_tokens: TokenData[], searchContext: SearchContext, leading

    if (!searchContext.fastSearch) {
        // For exact match with "=", we need different behavior
-        if (leadingOperator === "=" && tokens.length === 1) {
-            // Exact match on title OR exact match on content
+        if (leadingOperator === "=" && tokens.length >= 1) {
+            // Exact match on title OR exact match on content OR exact match in flat text (includes attributes)
+            // For multi-word, join tokens with space to form exact phrase
+            const titleSearchValue = tokens.join(" ");
            return new OrExp([
-                new PropertyComparisonExp(searchContext, "title", "=", tokens[0]),
-                new NoteContentFulltextExp("=", { tokens, flatText: false })
+                new PropertyComparisonExp(searchContext, "title", "=", titleSearchValue),
+                new NoteContentFulltextExp("=", { tokens, flatText: false }),
+                new NoteContentFulltextExp("=", { tokens, flatText: true })
            ]);
        }
        return new OrExp([new NoteFlatTextExp(tokens), new NoteContentFulltextExp(operator, { tokens, flatText: true })]);
--- a/apps/server/src/services/search/services/search.spec.ts
+++ b/apps/server/src/services/search/services/search.spec.ts
@@ -304,10 +304,13 @@ describe("Search", () => {

        const searchContext = new SearchContext();

-        // Test 1: With = and quotes, treat as multi-word exact match (both words must match)
+        // Test 1: With = and quotes, treat as exact phrase match (consecutive words in order)
        let searchResults = searchService.findResultsWithQuery("='exact phrase'", searchContext);
-        // With current implementation, this searches for notes containing both "exact" and "phrase" words
-        expect(searchResults.length).toEqual(4); // All notes with both words
+        // Should match only notes containing the exact phrase "exact phrase"
+        expect(searchResults.length).toEqual(3); // Only notes with consecutive "exact phrase"
+        expect(findNoteByTitle(searchResults, "exact phrase")).toBeTruthy();
+        expect(findNoteByTitle(searchResults, "exact phrase match")).toBeTruthy();
+        expect(findNoteByTitle(searchResults, "this exact phrase here")).toBeTruthy();

        // Test 2: Without =, quoted phrase should find substring/contains matches
        searchResults = searchService.findResultsWithQuery("'exact phrase'", searchContext);
@@ -316,9 +319,10 @@ describe("Search", () => {
        expect(findNoteByTitle(searchResults, "exact phrase match")).toBeTruthy();
        expect(findNoteByTitle(searchResults, "this exact phrase here")).toBeTruthy();

-        // Test 3: Verify word order doesn't matter with exact word matching
+        // Test 3: Verify word order matters with exact phrase matching
        searchResults = searchService.findResultsWithQuery("='phrase exact'", searchContext);
-        expect(searchResults.length).toEqual(4); // All notes with both words
+        expect(searchResults.length).toEqual(1); // Only "phrase exact" matches
+        expect(findNoteByTitle(searchResults, "phrase exact")).toBeTruthy();
    });

    it("leading = operator case sensitivity", () => {
@@ -368,15 +372,15 @@ describe("Search", () => {
        expect(findNoteByTitle(searchResults, "test.note")).toBeTruthy();

        // For phrases with spaces, use quotes to keep them together
-        // With word-boundary matching, this finds all notes with both words
+        // With exact phrase matching, this finds notes with the consecutive phrase
        searchResults = searchService.findResultsWithQuery("='test note'", searchContext);
-        expect(searchResults.length).toEqual(1); // Only "test note" has both words as separate tokens
+        expect(searchResults.length).toEqual(1); // Only "test note" has the exact phrase
        expect(findNoteByTitle(searchResults, "test note")).toBeTruthy();

-        // Without quotes, "test note" is tokenized as two separate words
-        // and will match all notes containing both "test" AND "note" words
+        // Without quotes, "test note" is tokenized as two separate tokens
+        // and will be treated as an exact phrase search with = operator
        searchResults = searchService.findResultsWithQuery("=test note", searchContext);
-        expect(searchResults.length).toEqual(1); // Only "test note" has both as separate words
+        expect(searchResults.length).toEqual(1); // Only "test note" has the exact phrase

        // Without =, should find all matches containing "test" substring
        searchResults = searchService.findResultsWithQuery("test", searchContext);