mirror of
https://github.com/zadam/trilium.git
synced 2025-10-30 01:36:24 +01:00
Compare commits
3 Commits
v0.99.2
...
feat/quick
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
912bc61730 | ||
|
|
93e8459d4b | ||
|
|
6c26fa709e |
@@ -120,6 +120,74 @@ class NoteContentFulltextExp extends Expression {
|
|||||||
}
|
}
|
||||||
content = processedContent;
|
content = processedContent;
|
||||||
|
|
||||||
|
// Check note size and determine search strategy
|
||||||
|
const contentSize = content.length;
|
||||||
|
const isExtremeNote = contentSize > FUZZY_SEARCH_CONFIG.EXTREME_NOTE_SIZE_THRESHOLD;
|
||||||
|
const isLargeNote = contentSize > FUZZY_SEARCH_CONFIG.LARGE_NOTE_SIZE_THRESHOLD;
|
||||||
|
const isFuzzyOperator = this.operator === "~=" || this.operator === "~*";
|
||||||
|
|
||||||
|
// For extremely large notes (>5MB), only search title regardless of operator
|
||||||
|
if (isExtremeNote) {
|
||||||
|
const note = becca.notes[noteId];
|
||||||
|
const title = note.title || "";
|
||||||
|
|
||||||
|
log.info(`Note ${noteId} is ${(contentSize / (1024 * 1024)).toFixed(1)}MB - searching title only due to extreme size`);
|
||||||
|
|
||||||
|
// For fuzzy operators, use fuzzy matching on title
|
||||||
|
// For other operators, use exact/wildcard matching on title
|
||||||
|
const normalizedTitle = normalizeSearchText(title);
|
||||||
|
let titleMatches = false;
|
||||||
|
|
||||||
|
if (isFuzzyOperator) {
|
||||||
|
titleMatches = this.tokens.some(token =>
|
||||||
|
this.fuzzyMatchToken(normalizeSearchText(token), normalizedTitle)
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
// Apply the operator to title matching
|
||||||
|
titleMatches = this.tokens.every(token => {
|
||||||
|
const normalizedToken = normalizeSearchText(token);
|
||||||
|
if (this.operator === "*=*") return normalizedTitle.includes(normalizedToken);
|
||||||
|
if (this.operator === "=") return normalizedTitle === normalizedToken;
|
||||||
|
if (this.operator === "!=") return normalizedTitle !== normalizedToken;
|
||||||
|
if (this.operator === "*=") return normalizedTitle.endsWith(normalizedToken);
|
||||||
|
if (this.operator === "=*") return normalizedTitle.startsWith(normalizedToken);
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (titleMatches) {
|
||||||
|
resultNoteSet.add(becca.notes[noteId]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
// For large notes (250KB-5MB) with fuzzy operators, use optimized strategy
|
||||||
|
if (isLargeNote && isFuzzyOperator) {
|
||||||
|
const note = becca.notes[noteId];
|
||||||
|
const title = note.title || "";
|
||||||
|
|
||||||
|
log.info(`Note ${noteId} is ${(contentSize / 1024).toFixed(1)}KB - using optimized search (fuzzy on title, exact on content)`);
|
||||||
|
|
||||||
|
// Perform fuzzy search on title
|
||||||
|
const titleMatches = this.fuzzyMatchToken(normalizeSearchText(this.tokens[0]), normalizeSearchText(title));
|
||||||
|
|
||||||
|
// Perform exact match on content for all tokens
|
||||||
|
const contentMatches = this.tokens.every(token => {
|
||||||
|
const normalizedToken = normalizeSearchText(token);
|
||||||
|
const normalizedContent = normalizeSearchText(content);
|
||||||
|
return normalizedContent.includes(normalizedToken);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Add to results if either title matches with fuzzy or content matches exactly
|
||||||
|
if (titleMatches || contentMatches) {
|
||||||
|
resultNoteSet.add(becca.notes[noteId]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Standard search logic for non-large notes or non-fuzzy operators
|
||||||
if (this.tokens.length === 1) {
|
if (this.tokens.length === 1) {
|
||||||
const [token] = this.tokens;
|
const [token] = this.tokens;
|
||||||
|
|
||||||
@@ -250,11 +318,6 @@ class NoteContentFulltextExp extends Expression {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Warn about large word counts but still attempt matching
|
|
||||||
if (words.length > FUZZY_SEARCH_CONFIG.PERFORMANCE_WARNING_WORDS) {
|
|
||||||
console.info(`Large word count for phrase matching: ${words.length} words - may take longer but will attempt full matching`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find positions of each token
|
// Find positions of each token
|
||||||
const tokenPositions: number[][] = this.tokens.map(token => {
|
const tokenPositions: number[][] = this.tokens.map(token => {
|
||||||
const normalizedToken = normalizeSearchText(token);
|
const normalizedToken = normalizeSearchText(token);
|
||||||
|
|||||||
@@ -14,15 +14,13 @@ export const FUZZY_SEARCH_CONFIG = {
|
|||||||
MAX_EDIT_DISTANCE: 2,
|
MAX_EDIT_DISTANCE: 2,
|
||||||
// Maximum proximity distance for phrase matching (in words)
|
// Maximum proximity distance for phrase matching (in words)
|
||||||
MAX_PHRASE_PROXIMITY: 10,
|
MAX_PHRASE_PROXIMITY: 10,
|
||||||
|
// Large note threshold - above this, use optimized search strategy (fuzzy on title only)
|
||||||
|
LARGE_NOTE_SIZE_THRESHOLD: 250000, // 250KB - switch to title-only fuzzy for performance
|
||||||
|
// Extreme note threshold - above this, skip content search entirely
|
||||||
|
EXTREME_NOTE_SIZE_THRESHOLD: 5 * 1024 * 1024, // 5MB - title search only
|
||||||
// Absolute hard limits for extreme cases - only to prevent system crashes
|
// Absolute hard limits for extreme cases - only to prevent system crashes
|
||||||
ABSOLUTE_MAX_CONTENT_SIZE: 100 * 1024 * 1024, // 100MB - extreme upper limit to prevent OOM
|
ABSOLUTE_MAX_CONTENT_SIZE: 100 * 1024 * 1024, // 100MB - extreme upper limit to prevent OOM
|
||||||
ABSOLUTE_MAX_WORD_COUNT: 2000000, // 2M words - extreme upper limit for word processing
|
ABSOLUTE_MAX_WORD_COUNT: 2000000, // 2M words - extreme upper limit for word processing
|
||||||
// Performance warning thresholds - inform user but still attempt search
|
|
||||||
PERFORMANCE_WARNING_SIZE: 5 * 1024 * 1024, // 5MB - warn about potential performance impact
|
|
||||||
PERFORMANCE_WARNING_WORDS: 100000, // 100K words - warn about word count impact
|
|
||||||
// Progressive processing thresholds for very large content
|
|
||||||
PROGRESSIVE_PROCESSING_SIZE: 10 * 1024 * 1024, // 10MB - use progressive processing
|
|
||||||
PROGRESSIVE_PROCESSING_WORDS: 500000, // 500K words - use progressive processing
|
|
||||||
// Performance thresholds
|
// Performance thresholds
|
||||||
EARLY_TERMINATION_THRESHOLD: 3,
|
EARLY_TERMINATION_THRESHOLD: 3,
|
||||||
} as const;
|
} as const;
|
||||||
@@ -204,7 +202,8 @@ export function validateFuzzySearchTokens(tokens: string[], operator: string): {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Validates and preprocesses content for search operations.
|
* Validates and preprocesses content for search operations.
|
||||||
* Philosophy: Try to search everything! Only block truly extreme cases that could crash the system.
|
* Only blocks truly extreme cases that could crash the system.
|
||||||
|
* Large notes (>50K words) are handled with optimized search strategy instead.
|
||||||
*
|
*
|
||||||
* @param content The content to validate and preprocess
|
* @param content The content to validate and preprocess
|
||||||
* @param noteId The note ID (for logging purposes)
|
* @param noteId The note ID (for logging purposes)
|
||||||
@@ -222,12 +221,7 @@ export function validateAndPreprocessContent(content: string, noteId?: string):
|
|||||||
return content.substring(0, FUZZY_SEARCH_CONFIG.ABSOLUTE_MAX_CONTENT_SIZE);
|
return content.substring(0, FUZZY_SEARCH_CONFIG.ABSOLUTE_MAX_CONTENT_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Warn about very large content but still process it
|
// For word count, only block truly extreme cases
|
||||||
if (content.length > FUZZY_SEARCH_CONFIG.PERFORMANCE_WARNING_SIZE) {
|
|
||||||
console.info(`Large content for note ${noteId || 'unknown'}: ${content.length} bytes - processing may take time but will attempt full search`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// For word count, be even more permissive - only block truly extreme cases
|
|
||||||
const wordCount = content.split(/\s+/).length;
|
const wordCount = content.split(/\s+/).length;
|
||||||
if (wordCount > FUZZY_SEARCH_CONFIG.ABSOLUTE_MAX_WORD_COUNT) {
|
if (wordCount > FUZZY_SEARCH_CONFIG.ABSOLUTE_MAX_WORD_COUNT) {
|
||||||
console.error(`Word count exceeds absolute system limit for note ${noteId || 'unknown'}: ${wordCount} words - this could cause system instability`);
|
console.error(`Word count exceeds absolute system limit for note ${noteId || 'unknown'}: ${wordCount} words - this could cause system instability`);
|
||||||
@@ -235,15 +229,8 @@ export function validateAndPreprocessContent(content: string, noteId?: string):
|
|||||||
return content.split(/\s+/).slice(0, FUZZY_SEARCH_CONFIG.ABSOLUTE_MAX_WORD_COUNT).join(' ');
|
return content.split(/\s+/).slice(0, FUZZY_SEARCH_CONFIG.ABSOLUTE_MAX_WORD_COUNT).join(' ');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Warn about high word counts but still process them
|
// Notes above LARGE_NOTE_SIZE_THRESHOLD (250KB) will use optimized search strategy
|
||||||
if (wordCount > FUZZY_SEARCH_CONFIG.PERFORMANCE_WARNING_WORDS) {
|
// (handled in note_content_fulltext.ts)
|
||||||
console.info(`High word count for note ${noteId || 'unknown'}: ${wordCount} words - phrase matching may take time but will attempt full search`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Progressive processing warning for very large content
|
|
||||||
if (content.length > FUZZY_SEARCH_CONFIG.PROGRESSIVE_PROCESSING_SIZE || wordCount > FUZZY_SEARCH_CONFIG.PROGRESSIVE_PROCESSING_WORDS) {
|
|
||||||
console.info(`Very large content for note ${noteId || 'unknown'} - using progressive processing to maintain responsiveness`);
|
|
||||||
}
|
|
||||||
|
|
||||||
return content;
|
return content;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user