mirror of
				https://github.com/zadam/trilium.git
				synced 2025-10-31 18:36:30 +01:00 
			
		
		
		
	feat(llm): remove everything to do with embeddings
This commit is contained in:
		| @@ -23,4 +23,4 @@ export interface EntityChange { | |||||||
|     instanceId?: string | null; |     instanceId?: string | null; | ||||||
| } | } | ||||||
|  |  | ||||||
| export type EntityType = "notes" | "branches" | "attributes" | "note_reordering" | "revisions" | "options" | "attachments" | "blobs" | "etapi_tokens" | "note_embeddings"; | export type EntityType = "notes" | "branches" | "attributes" | "note_reordering" | "revisions" | "options" | "attachments" | "blobs" | "etapi_tokens"; | ||||||
|   | |||||||
| @@ -35,7 +35,7 @@ async function processEntityChanges(entityChanges: EntityChange[]) { | |||||||
|                 loadResults.addOption(attributeEntity.name); |                 loadResults.addOption(attributeEntity.name); | ||||||
|             } else if (ec.entityName === "attachments") { |             } else if (ec.entityName === "attachments") { | ||||||
|                 processAttachment(loadResults, ec); |                 processAttachment(loadResults, ec); | ||||||
|             } else if (ec.entityName === "blobs" || ec.entityName === "etapi_tokens" || ec.entityName === "note_embeddings") { |             } else if (ec.entityName === "blobs" || ec.entityName === "etapi_tokens") { | ||||||
|                 // NOOP - these entities are handled at the backend level and don't require frontend processing |                 // NOOP - these entities are handled at the backend level and don't require frontend processing | ||||||
|             } else { |             } else { | ||||||
|                 throw new Error(`Unknown entityName '${ec.entityName}'`); |                 throw new Error(`Unknown entityName '${ec.entityName}'`); | ||||||
|   | |||||||
| @@ -64,7 +64,6 @@ type EntityRowMappings = { | |||||||
|     options: OptionRow; |     options: OptionRow; | ||||||
|     revisions: RevisionRow; |     revisions: RevisionRow; | ||||||
|     note_reordering: NoteReorderingRow; |     note_reordering: NoteReorderingRow; | ||||||
|     note_embeddings: NoteEmbeddingRow; |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
| export type EntityRowNames = keyof EntityRowMappings; | export type EntityRowNames = keyof EntityRowMappings; | ||||||
|   | |||||||
| @@ -1124,10 +1124,8 @@ | |||||||
|     "layout-horizontal-description": "launcher bar is underneath the tab bar, the tab bar is now full width." |     "layout-horizontal-description": "launcher bar is underneath the tab bar, the tab bar is now full width." | ||||||
|   }, |   }, | ||||||
|   "ai_llm": { |   "ai_llm": { | ||||||
|     "embeddings_configuration": "Embeddings Configuration", |  | ||||||
|     "not_started": "Not started", |     "not_started": "Not started", | ||||||
|     "title": "AI & Embedding Settings", |     "title": "AI Settings", | ||||||
|     "embedding_statistics": "Embedding Statistics", |  | ||||||
|     "processed_notes": "Processed Notes", |     "processed_notes": "Processed Notes", | ||||||
|     "total_notes": "Total Notes", |     "total_notes": "Total Notes", | ||||||
|     "progress": "Progress", |     "progress": "Progress", | ||||||
| @@ -1135,7 +1133,6 @@ | |||||||
|     "failed_notes": "Failed Notes", |     "failed_notes": "Failed Notes", | ||||||
|     "last_processed": "Last Processed", |     "last_processed": "Last Processed", | ||||||
|     "refresh_stats": "Refresh Statistics", |     "refresh_stats": "Refresh Statistics", | ||||||
|     "no_failed_embeddings": "No failed embeddings found.", |  | ||||||
|     "enable_ai_features": "Enable AI/LLM features", |     "enable_ai_features": "Enable AI/LLM features", | ||||||
|     "enable_ai_description": "Enable AI features like note summarization, content generation, and other LLM capabilities", |     "enable_ai_description": "Enable AI features like note summarization, content generation, and other LLM capabilities", | ||||||
|     "openai_tab": "OpenAI", |     "openai_tab": "OpenAI", | ||||||
| @@ -1160,20 +1157,16 @@ | |||||||
|     "anthropic_api_key_description": "Your Anthropic API key for accessing Claude models", |     "anthropic_api_key_description": "Your Anthropic API key for accessing Claude models", | ||||||
|     "default_model": "Default Model", |     "default_model": "Default Model", | ||||||
|     "openai_model_description": "Examples: gpt-4o, gpt-4-turbo, gpt-3.5-turbo", |     "openai_model_description": "Examples: gpt-4o, gpt-4-turbo, gpt-3.5-turbo", | ||||||
|     "embedding_model": "Embedding Model", |  | ||||||
|     "openai_embedding_model_description": "Model used for generating embeddings (text-embedding-3-small recommended)", |  | ||||||
|     "base_url": "Base URL", |     "base_url": "Base URL", | ||||||
|     "openai_url_description": "Default: https://api.openai.com/v1", |     "openai_url_description": "Default: https://api.openai.com/v1", | ||||||
|     "anthropic_settings": "Anthropic Settings", |     "anthropic_settings": "Anthropic Settings", | ||||||
|     "anthropic_url_description": "Base URL for the Anthropic API (default: https://api.anthropic.com)", |     "anthropic_url_description": "Base URL for the Anthropic API (default: https://api.anthropic.com)", | ||||||
|     "anthropic_model_description": "Anthropic Claude models for chat completion", |     "anthropic_model_description": "Anthropic Claude models for chat completion", | ||||||
|     "voyage_settings": "Voyage AI Settings", |     "voyage_settings": "Voyage AI Settings", | ||||||
|     "voyage_api_key_description": "Your Voyage AI API key for accessing embeddings services", |  | ||||||
|     "ollama_settings": "Ollama Settings", |     "ollama_settings": "Ollama Settings", | ||||||
|     "ollama_url_description": "URL for the Ollama API (default: http://localhost:11434)", |     "ollama_url_description": "URL for the Ollama API (default: http://localhost:11434)", | ||||||
|     "ollama_model_description": "Ollama model to use for chat completion", |     "ollama_model_description": "Ollama model to use for chat completion", | ||||||
|     "anthropic_configuration": "Anthropic Configuration", |     "anthropic_configuration": "Anthropic Configuration", | ||||||
|     "voyage_embedding_model_description": "Voyage AI embedding models for text embeddings (voyage-2 recommended)", |  | ||||||
|     "voyage_configuration": "Voyage AI Configuration", |     "voyage_configuration": "Voyage AI Configuration", | ||||||
|     "voyage_url_description": "Default: https://api.voyageai.com/v1", |     "voyage_url_description": "Default: https://api.voyageai.com/v1", | ||||||
|     "ollama_configuration": "Ollama Configuration", |     "ollama_configuration": "Ollama Configuration", | ||||||
| @@ -1181,28 +1174,10 @@ | |||||||
|     "enable_ollama_description": "Enable Ollama for local AI model usage", |     "enable_ollama_description": "Enable Ollama for local AI model usage", | ||||||
|     "ollama_url": "Ollama URL", |     "ollama_url": "Ollama URL", | ||||||
|     "ollama_model": "Ollama Model", |     "ollama_model": "Ollama Model", | ||||||
|     "ollama_embedding_model": "Embedding Model", |  | ||||||
|     "ollama_embedding_model_description": "Specialized model for generating embeddings (vector representations)", |  | ||||||
|     "refresh_models": "Refresh Models", |     "refresh_models": "Refresh Models", | ||||||
|     "refreshing_models": "Refreshing...", |     "refreshing_models": "Refreshing...", | ||||||
|     "embedding_configuration": "Embeddings Configuration", |  | ||||||
|     "embedding_default_provider": "Default Provider", |  | ||||||
|     "embedding_default_provider_description": "Select the default provider used for generating note embeddings", |  | ||||||
|     "embedding_provider_precedence": "Embedding Provider Precedence", |  | ||||||
|     "embedding_providers_order": "Embedding Provider Order", |  | ||||||
|     "embedding_providers_order_description": "Set the order of embedding providers in comma-separated format (e.g., \"openai,voyage,ollama,local\")", |  | ||||||
|     "enable_automatic_indexing": "Enable Automatic Indexing", |     "enable_automatic_indexing": "Enable Automatic Indexing", | ||||||
|     "enable_automatic_indexing_description": "Automatically generate embeddings for new and updated notes", |  | ||||||
|     "embedding_auto_update_enabled": "Auto-update Embeddings", |  | ||||||
|     "embedding_auto_update_enabled_description": "Automatically update embeddings when notes are modified", |  | ||||||
|     "recreate_embeddings": "Recreate All Embeddings", |  | ||||||
|     "recreate_embeddings_description": "Regenerate all note embeddings from scratch (may take a long time for large note collections)", |  | ||||||
|     "recreate_embeddings_started": "Embeddings regeneration started. This may take a long time for large note collections.", |  | ||||||
|     "recreate_embeddings_error": "Error starting embeddings regeneration. Check logs for details.", |  | ||||||
|     "recreate_embeddings_confirm": "Are you sure you want to recreate all embeddings? This may take a long time for large note collections.", |  | ||||||
|     "rebuild_index": "Rebuild Index", |     "rebuild_index": "Rebuild Index", | ||||||
|     "rebuild_index_description": "Rebuild the vector search index for better performance (much faster than recreating embeddings)", |  | ||||||
|     "rebuild_index_started": "Embedding index rebuild started. This may take several minutes.", |  | ||||||
|     "rebuild_index_error": "Error starting index rebuild. Check logs for details.", |     "rebuild_index_error": "Error starting index rebuild. Check logs for details.", | ||||||
|     "note_title": "Note Title", |     "note_title": "Note Title", | ||||||
|     "error": "Error", |     "error": "Error", | ||||||
| @@ -1212,43 +1187,16 @@ | |||||||
|     "partial": "{{ percentage }}% completed", |     "partial": "{{ percentage }}% completed", | ||||||
|     "retry_queued": "Note queued for retry", |     "retry_queued": "Note queued for retry", | ||||||
|     "retry_failed": "Failed to queue note for retry", |     "retry_failed": "Failed to queue note for retry", | ||||||
|     "embedding_provider_precedence_description": "Comma-separated list of providers in order of precedence for embeddings search (e.g., 'openai,ollama,anthropic')", |  | ||||||
|     "embedding_dimension_strategy": "Embedding Dimension Strategy", |  | ||||||
|     "embedding_dimension_auto": "Auto (Recommended)", |  | ||||||
|     "embedding_dimension_fixed": "Fixed", |  | ||||||
|     "embedding_similarity_threshold": "Similarity Threshold", |  | ||||||
|     "embedding_similarity_threshold_description": "Minimum similarity score for notes to be included in search results (0-1)", |  | ||||||
|     "max_notes_per_llm_query": "Max Notes Per Query", |     "max_notes_per_llm_query": "Max Notes Per Query", | ||||||
|     "max_notes_per_llm_query_description": "Maximum number of similar notes to include in AI context", |     "max_notes_per_llm_query_description": "Maximum number of similar notes to include in AI context", | ||||||
|     "embedding_dimension_strategy_description": "Choose how embeddings are handled. 'Native' preserves maximum information by adapting smaller vectors to match larger ones (recommended). 'Regenerate' creates new embeddings with the target model for specific search needs.", |  | ||||||
|     "drag_providers_to_reorder": "Drag providers up or down to set your preferred order for embedding searches", |  | ||||||
|     "active_providers": "Active Providers", |     "active_providers": "Active Providers", | ||||||
|     "disabled_providers": "Disabled Providers", |     "disabled_providers": "Disabled Providers", | ||||||
|     "remove_provider": "Remove provider from search", |     "remove_provider": "Remove provider from search", | ||||||
|     "restore_provider": "Restore provider to search", |     "restore_provider": "Restore provider to search", | ||||||
|     "embedding_generation_location": "Generation Location", |  | ||||||
|     "embedding_generation_location_description": "Select where embedding generation should happen", |  | ||||||
|     "embedding_generation_location_client": "Client/Server", |  | ||||||
|     "embedding_generation_location_sync_server": "Sync Server", |  | ||||||
|     "enable_auto_update_embeddings": "Auto-update Embeddings", |  | ||||||
|     "enable_auto_update_embeddings_description": "Automatically update embeddings when notes are modified", |  | ||||||
|     "auto_update_embeddings": "Auto-update Embeddings", |  | ||||||
|     "auto_update_embeddings_desc": "Automatically update embeddings when notes are modified", |  | ||||||
|     "similarity_threshold": "Similarity Threshold", |     "similarity_threshold": "Similarity Threshold", | ||||||
|     "similarity_threshold_description": "Minimum similarity score (0-1) for notes to be included in context for LLM queries", |     "similarity_threshold_description": "Minimum similarity score (0-1) for notes to be included in context for LLM queries", | ||||||
|     "embedding_batch_size": "Batch Size", |  | ||||||
|     "embedding_batch_size_description": "Number of notes to process in a single batch (1-50)", |  | ||||||
|     "embedding_update_interval": "Update Interval (ms)", |  | ||||||
|     "embedding_update_interval_description": "Time between processing batches of embeddings (in milliseconds)", |  | ||||||
|     "embedding_default_dimension": "Default Dimension", |  | ||||||
|     "embedding_default_dimension_description": "Default embedding vector dimension when creating new embeddings", |  | ||||||
|     "reprocess_all_embeddings": "Reprocess All Embeddings", |  | ||||||
|     "reprocess_all_embeddings_description": "Queue all notes for embedding processing. This may take some time depending on your number of notes.", |  | ||||||
|     "reprocessing_embeddings": "Reprocessing...", |  | ||||||
|     "reprocess_started": "Embedding reprocessing started in the background", |     "reprocess_started": "Embedding reprocessing started in the background", | ||||||
|     "reprocess_error": "Error starting embedding reprocessing", |  | ||||||
|     "reprocess_index": "Rebuild Search Index", |     "reprocess_index": "Rebuild Search Index", | ||||||
|     "reprocess_index_description": "Optimize the search index for better performance. This uses existing embeddings without regenerating them (much faster than reprocessing all embeddings).", |  | ||||||
|     "reprocessing_index": "Rebuilding...", |     "reprocessing_index": "Rebuilding...", | ||||||
|     "reprocess_index_started": "Search index optimization started in the background", |     "reprocess_index_started": "Search index optimization started in the background", | ||||||
|     "reprocess_index_error": "Error rebuilding search index", |     "reprocess_index_error": "Error rebuilding search index", | ||||||
| @@ -1261,7 +1209,6 @@ | |||||||
|     "incomplete": "Incomplete ({{percentage}}%)", |     "incomplete": "Incomplete ({{percentage}}%)", | ||||||
|     "complete": "Complete (100%)", |     "complete": "Complete (100%)", | ||||||
|     "refreshing": "Refreshing...", |     "refreshing": "Refreshing...", | ||||||
|     "stats_error": "Error fetching embedding statistics", |  | ||||||
|     "auto_refresh_notice": "Auto-refreshes every {{seconds}} seconds", |     "auto_refresh_notice": "Auto-refreshes every {{seconds}} seconds", | ||||||
|     "note_queued_for_retry": "Note queued for retry", |     "note_queued_for_retry": "Note queued for retry", | ||||||
|     "failed_to_retry_note": "Failed to retry note", |     "failed_to_retry_note": "Failed to retry note", | ||||||
| @@ -1269,7 +1216,6 @@ | |||||||
|     "failed_to_retry_all": "Failed to retry notes", |     "failed_to_retry_all": "Failed to retry notes", | ||||||
|     "ai_settings": "AI Settings", |     "ai_settings": "AI Settings", | ||||||
|     "api_key_tooltip": "API key for accessing the service", |     "api_key_tooltip": "API key for accessing the service", | ||||||
|     "confirm_delete_embeddings": "Are you sure you want to delete all AI embeddings? This will remove all semantic search capabilities until notes are reindexed, which can take a significant amount of time.", |  | ||||||
|     "empty_key_warning": { |     "empty_key_warning": { | ||||||
|       "anthropic": "Anthropic API key is empty. Please enter a valid API key.", |       "anthropic": "Anthropic API key is empty. Please enter a valid API key.", | ||||||
|       "openai": "OpenAI API key is empty. Please enter a valid API key.", |       "openai": "OpenAI API key is empty. Please enter a valid API key.", | ||||||
| @@ -1302,7 +1248,6 @@ | |||||||
|     "note_chat": "Note Chat", |     "note_chat": "Note Chat", | ||||||
|     "notes_indexed": "{{ count }} note indexed", |     "notes_indexed": "{{ count }} note indexed", | ||||||
|     "notes_indexed_plural": "{{ count }} notes indexed", |     "notes_indexed_plural": "{{ count }} notes indexed", | ||||||
|     "reset_embeddings": "Reset Embeddings", |  | ||||||
|     "sources": "Sources", |     "sources": "Sources", | ||||||
|     "start_indexing": "Start Indexing", |     "start_indexing": "Start Indexing", | ||||||
|     "use_advanced_context": "Use Advanced Context", |     "use_advanced_context": "Use Advanced Context", | ||||||
| @@ -1315,24 +1260,10 @@ | |||||||
|     }, |     }, | ||||||
|     "create_new_ai_chat": "Create new AI Chat", |     "create_new_ai_chat": "Create new AI Chat", | ||||||
|     "configuration_warnings": "There are some issues with your AI configuration. Please check your settings.", |     "configuration_warnings": "There are some issues with your AI configuration. Please check your settings.", | ||||||
|     "embeddings_started": "Embedding generation started", |  | ||||||
|     "embeddings_stopped": "Embedding generation stopped", |  | ||||||
|     "embeddings_toggle_error": "Error toggling embeddings", |  | ||||||
|     "local_embedding_description": "Uses local embedding models for offline text embedding generation", |  | ||||||
|     "local_embedding_settings": "Local Embedding Settings", |  | ||||||
|     "ollama_embedding_settings": "Ollama Embedding Settings", |  | ||||||
|     "ollama_embedding_url_description": "URL for the Ollama API for embedding generation (default: http://localhost:11434)", |  | ||||||
|     "openai_embedding_api_key_description": "Your OpenAI API key for embedding generation (can be different from chat API key)", |  | ||||||
|     "openai_embedding_settings": "OpenAI Embedding Settings", |  | ||||||
|     "openai_embedding_url_description": "Base URL for OpenAI embedding API (default: https://api.openai.com/v1)", |  | ||||||
|     "selected_embedding_provider": "Selected Embedding Provider", |  | ||||||
|     "selected_embedding_provider_description": "Choose the provider for generating note embeddings", |  | ||||||
|     "selected_provider": "Selected Provider", |     "selected_provider": "Selected Provider", | ||||||
|     "selected_provider_description": "Choose the AI provider for chat and completion features", |     "selected_provider_description": "Choose the AI provider for chat and completion features", | ||||||
|     "select_embedding_provider": "Select embedding provider...", |  | ||||||
|     "select_model": "Select model...", |     "select_model": "Select model...", | ||||||
|     "select_provider": "Select provider...", |     "select_provider": "Select provider..." | ||||||
|     "voyage_embedding_url_description": "Base URL for the Voyage AI embedding API (default: https://api.voyageai.com/v1)" |  | ||||||
|   }, |   }, | ||||||
|   "zoom_factor": { |   "zoom_factor": { | ||||||
|     "title": "Zoom Factor (desktop build only)", |     "title": "Zoom Factor (desktop build only)", | ||||||
|   | |||||||
| @@ -258,9 +258,3 @@ export async function getDirectResponse(noteId: string, messageParams: any): Pro | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Get embedding statistics |  | ||||||
|  */ |  | ||||||
| export async function getEmbeddingStats(): Promise<any> { |  | ||||||
|     return server.get('llm/embeddings/stats'); |  | ||||||
| } |  | ||||||
|   | |||||||
| @@ -2,12 +2,11 @@ | |||||||
|  * Validation functions for LLM Chat |  * Validation functions for LLM Chat | ||||||
|  */ |  */ | ||||||
| import options from "../../services/options.js"; | import options from "../../services/options.js"; | ||||||
| import { getEmbeddingStats } from "./communication.js"; |  | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * Validate embedding providers configuration |  * Validate providers configuration | ||||||
|  */ |  */ | ||||||
| export async function validateEmbeddingProviders(validationWarning: HTMLElement): Promise<void> { | export async function validateProviders(validationWarning: HTMLElement): Promise<void> { | ||||||
|     try { |     try { | ||||||
|         // Check if AI is enabled |         // Check if AI is enabled | ||||||
|         const aiEnabled = options.is('aiEnabled'); |         const aiEnabled = options.is('aiEnabled'); | ||||||
| @@ -62,23 +61,8 @@ export async function validateEmbeddingProviders(validationWarning: HTMLElement) | |||||||
|             // Add checks for other providers as needed |             // Add checks for other providers as needed | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // Fetch embedding stats to check if there are any notes being processed |         // Show warning if there are configuration issues | ||||||
|         const embeddingStats = await getEmbeddingStats() as { |         if (configIssues.length > 0) { | ||||||
|             success: boolean, |  | ||||||
|             stats: { |  | ||||||
|                 totalNotesCount: number; |  | ||||||
|                 embeddedNotesCount: number; |  | ||||||
|                 queuedNotesCount: number; |  | ||||||
|                 failedNotesCount: number; |  | ||||||
|                 lastProcessedDate: string | null; |  | ||||||
|                 percentComplete: number; |  | ||||||
|             } |  | ||||||
|         }; |  | ||||||
|         const queuedNotes = embeddingStats?.stats?.queuedNotesCount || 0; |  | ||||||
|         const hasEmbeddingsInQueue = queuedNotes > 0; |  | ||||||
|  |  | ||||||
|         // Show warning if there are configuration issues or embeddings in queue |  | ||||||
|         if (configIssues.length > 0 || hasEmbeddingsInQueue) { |  | ||||||
|             let message = '<i class="bx bx-error-circle me-2"></i><strong>AI Provider Configuration Issues</strong>'; |             let message = '<i class="bx bx-error-circle me-2"></i><strong>AI Provider Configuration Issues</strong>'; | ||||||
|  |  | ||||||
|             message += '<ul class="mb-1 ps-4">'; |             message += '<ul class="mb-1 ps-4">'; | ||||||
| @@ -87,11 +71,6 @@ export async function validateEmbeddingProviders(validationWarning: HTMLElement) | |||||||
|             for (const issue of configIssues) { |             for (const issue of configIssues) { | ||||||
|                 message += `<li>${issue}</li>`; |                 message += `<li>${issue}</li>`; | ||||||
|             } |             } | ||||||
|              |  | ||||||
|             // Show warning about embeddings queue if applicable |  | ||||||
|             if (hasEmbeddingsInQueue) { |  | ||||||
|                 message += `<li>Currently processing embeddings for ${queuedNotes} notes. Some AI features may produce incomplete results until processing completes.</li>`; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             message += '</ul>'; |             message += '</ul>'; | ||||||
|             message += '<div class="mt-2"><a href="javascript:" class="settings-link btn btn-sm btn-outline-secondary"><i class="bx bx-cog me-1"></i>Open AI Settings</a></div>'; |             message += '<div class="mt-2"><a href="javascript:" class="settings-link btn btn-sm btn-outline-secondary"><i class="bx bx-cog me-1"></i>Open AI Settings</a></div>'; | ||||||
| @@ -103,7 +82,7 @@ export async function validateEmbeddingProviders(validationWarning: HTMLElement) | |||||||
|             validationWarning.style.display = 'none'; |             validationWarning.style.display = 'none'; | ||||||
|         } |         } | ||||||
|     } catch (error) { |     } catch (error) { | ||||||
|         console.error('Error validating embedding providers:', error); |         console.error('Error validating providers:', error); | ||||||
|         validationWarning.style.display = 'none'; |         validationWarning.style.display = 'none'; | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -4,16 +4,12 @@ import { t } from "../../../../services/i18n.js"; | |||||||
| import type { OptionDefinitions, OptionMap } from "@triliumnext/commons"; | import type { OptionDefinitions, OptionMap } from "@triliumnext/commons"; | ||||||
| import server from "../../../../services/server.js"; | import server from "../../../../services/server.js"; | ||||||
| import toastService from "../../../../services/toast.js"; | import toastService from "../../../../services/toast.js"; | ||||||
| import type { EmbeddingStats, FailedEmbeddingNotes } from "./interfaces.js"; |  | ||||||
| import { ProviderService } from "./providers.js"; | import { ProviderService } from "./providers.js"; | ||||||
|  |  | ||||||
| export default class AiSettingsWidget extends OptionsWidget { | export default class AiSettingsWidget extends OptionsWidget { | ||||||
|     private ollamaModelsRefreshed = false; |     private ollamaModelsRefreshed = false; | ||||||
|     private openaiModelsRefreshed = false; |     private openaiModelsRefreshed = false; | ||||||
|     private anthropicModelsRefreshed = false; |     private anthropicModelsRefreshed = false; | ||||||
|     private statsRefreshInterval: NodeJS.Timeout | null = null; |  | ||||||
|     private indexRebuildRefreshInterval: NodeJS.Timeout | null = null; |  | ||||||
|     private readonly STATS_REFRESH_INTERVAL = 5000; // 5 seconds |  | ||||||
|     private providerService: ProviderService | null = null; |     private providerService: ProviderService | null = null; | ||||||
|  |  | ||||||
|     doRender() { |     doRender() { | ||||||
| @@ -23,9 +19,6 @@ export default class AiSettingsWidget extends OptionsWidget { | |||||||
|         // Setup event handlers for options |         // Setup event handlers for options | ||||||
|         this.setupEventHandlers(); |         this.setupEventHandlers(); | ||||||
|  |  | ||||||
|         this.refreshEmbeddingStats(); |  | ||||||
|         this.fetchFailedEmbeddingNotes(); |  | ||||||
|  |  | ||||||
|         return this.$widget; |         return this.$widget; | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -57,26 +50,13 @@ export default class AiSettingsWidget extends OptionsWidget { | |||||||
|                     const isEnabled = value === 'true'; |                     const isEnabled = value === 'true'; | ||||||
|                      |                      | ||||||
|                     if (isEnabled) { |                     if (isEnabled) { | ||||||
|                         // Start embedding generation |                         toastService.showMessage(t("ai_llm.ai_enabled") || "AI features enabled"); | ||||||
|                         await server.post('llm/embeddings/start'); |  | ||||||
|                         toastService.showMessage(t("ai_llm.embeddings_started") || "Embedding generation started"); |  | ||||||
|                          |  | ||||||
|                         // Start polling for stats updates |  | ||||||
|                         this.refreshEmbeddingStats(); |  | ||||||
|                     } else { |                     } else { | ||||||
|                         // Stop embedding generation |                         toastService.showMessage(t("ai_llm.ai_disabled") || "AI features disabled"); | ||||||
|                         await server.post('llm/embeddings/stop'); |  | ||||||
|                         toastService.showMessage(t("ai_llm.embeddings_stopped") || "Embedding generation stopped"); |  | ||||||
|                          |  | ||||||
|                         // Clear any active polling intervals |  | ||||||
|                         if (this.indexRebuildRefreshInterval) { |  | ||||||
|                             clearInterval(this.indexRebuildRefreshInterval); |  | ||||||
|                             this.indexRebuildRefreshInterval = null; |  | ||||||
|                         } |  | ||||||
|                     } |                     } | ||||||
|                 } catch (error) { |                 } catch (error) { | ||||||
|                     console.error('Error toggling embeddings:', error); |                     console.error('Error toggling AI:', error); | ||||||
|                     toastService.showError(t("ai_llm.embeddings_toggle_error") || "Error toggling embeddings"); |                     toastService.showError(t("ai_llm.ai_toggle_error") || "Error toggling AI features"); | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|  |  | ||||||
| @@ -102,7 +82,6 @@ export default class AiSettingsWidget extends OptionsWidget { | |||||||
|         this.setupChangeHandler('.openai-api-key', 'openaiApiKey', true); |         this.setupChangeHandler('.openai-api-key', 'openaiApiKey', true); | ||||||
|         this.setupChangeHandler('.openai-base-url', 'openaiBaseUrl', true); |         this.setupChangeHandler('.openai-base-url', 'openaiBaseUrl', true); | ||||||
|         this.setupChangeHandler('.openai-default-model', 'openaiDefaultModel'); |         this.setupChangeHandler('.openai-default-model', 'openaiDefaultModel'); | ||||||
|         this.setupChangeHandler('.openai-embedding-model', 'openaiEmbeddingModel'); |  | ||||||
|  |  | ||||||
|         // Anthropic options |         // Anthropic options | ||||||
|         this.setupChangeHandler('.anthropic-api-key', 'anthropicApiKey', true); |         this.setupChangeHandler('.anthropic-api-key', 'anthropicApiKey', true); | ||||||
| @@ -111,18 +90,10 @@ export default class AiSettingsWidget extends OptionsWidget { | |||||||
|  |  | ||||||
|         // Voyage options |         // Voyage options | ||||||
|         this.setupChangeHandler('.voyage-api-key', 'voyageApiKey'); |         this.setupChangeHandler('.voyage-api-key', 'voyageApiKey'); | ||||||
|         this.setupChangeHandler('.voyage-embedding-model', 'voyageEmbeddingModel'); |  | ||||||
|         this.setupChangeHandler('.voyage-embedding-base-url', 'voyageEmbeddingBaseUrl'); |  | ||||||
|  |  | ||||||
|         // Ollama options |         // Ollama options | ||||||
|         this.setupChangeHandler('.ollama-base-url', 'ollamaBaseUrl'); |         this.setupChangeHandler('.ollama-base-url', 'ollamaBaseUrl'); | ||||||
|         this.setupChangeHandler('.ollama-default-model', 'ollamaDefaultModel'); |         this.setupChangeHandler('.ollama-default-model', 'ollamaDefaultModel'); | ||||||
|         this.setupChangeHandler('.ollama-embedding-model', 'ollamaEmbeddingModel'); |  | ||||||
|         this.setupChangeHandler('.ollama-embedding-base-url', 'ollamaEmbeddingBaseUrl'); |  | ||||||
|  |  | ||||||
|         // Embedding-specific provider options |  | ||||||
|         this.setupChangeHandler('.openai-embedding-api-key', 'openaiEmbeddingApiKey', true); |  | ||||||
|         this.setupChangeHandler('.openai-embedding-base-url', 'openaiEmbeddingBaseUrl', true); |  | ||||||
|  |  | ||||||
|         const $refreshModels = this.$widget.find('.refresh-models'); |         const $refreshModels = this.$widget.find('.refresh-models'); | ||||||
|         $refreshModels.on('click', async () => { |         $refreshModels.on('click', async () => { | ||||||
| @@ -162,15 +133,6 @@ export default class AiSettingsWidget extends OptionsWidget { | |||||||
|             this.anthropicModelsRefreshed = await this.providerService?.refreshAnthropicModels(false, this.anthropicModelsRefreshed) || false; |             this.anthropicModelsRefreshed = await this.providerService?.refreshAnthropicModels(false, this.anthropicModelsRefreshed) || false; | ||||||
|         }); |         }); | ||||||
|  |  | ||||||
|         // Embedding options event handlers |  | ||||||
|         this.setupChangeHandler('.embedding-auto-update-enabled', 'embeddingAutoUpdateEnabled', false, true); |  | ||||||
|         this.setupChangeHandler('.enable-automatic-indexing', 'enableAutomaticIndexing', false, true); |  | ||||||
|         this.setupChangeHandler('.embedding-similarity-threshold', 'embeddingSimilarityThreshold'); |  | ||||||
|         this.setupChangeHandler('.max-notes-per-llm-query', 'maxNotesPerLlmQuery'); |  | ||||||
|         this.setupChangeHandler('.embedding-selected-provider', 'embeddingSelectedProvider', true); |  | ||||||
|         this.setupChangeHandler('.embedding-dimension-strategy', 'embeddingDimensionStrategy'); |  | ||||||
|         this.setupChangeHandler('.embedding-batch-size', 'embeddingBatchSize'); |  | ||||||
|         this.setupChangeHandler('.embedding-update-interval', 'embeddingUpdateInterval'); |  | ||||||
|  |  | ||||||
|         // Add provider selection change handlers for dynamic settings visibility |         // Add provider selection change handlers for dynamic settings visibility | ||||||
|         this.$widget.find('.ai-selected-provider').on('change', async () => { |         this.$widget.find('.ai-selected-provider').on('change', async () => { | ||||||
| @@ -183,26 +145,13 @@ export default class AiSettingsWidget extends OptionsWidget { | |||||||
|             } |             } | ||||||
|         }); |         }); | ||||||
|  |  | ||||||
|         this.$widget.find('.embedding-selected-provider').on('change', async () => { |  | ||||||
|             const selectedProvider = this.$widget.find('.embedding-selected-provider').val() as string; |  | ||||||
|             this.$widget.find('.embedding-provider-settings').hide(); |  | ||||||
|             if (selectedProvider) { |  | ||||||
|                 this.$widget.find(`.${selectedProvider}-embedding-provider-settings`).show(); |  | ||||||
|                 // Automatically fetch embedding models for the newly selected provider |  | ||||||
|                 await this.fetchModelsForProvider(selectedProvider, 'embedding'); |  | ||||||
|             } |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         // Add base URL change handlers to trigger model fetching |         // Add base URL change handlers to trigger model fetching | ||||||
|         this.$widget.find('.openai-base-url').on('change', async () => { |         this.$widget.find('.openai-base-url').on('change', async () => { | ||||||
|             const selectedProvider = this.$widget.find('.ai-selected-provider').val() as string; |             const selectedProvider = this.$widget.find('.ai-selected-provider').val() as string; | ||||||
|             const selectedEmbeddingProvider = this.$widget.find('.embedding-selected-provider').val() as string; |  | ||||||
|             if (selectedProvider === 'openai') { |             if (selectedProvider === 'openai') { | ||||||
|                 await this.fetchModelsForProvider('openai', 'chat'); |                 await this.fetchModelsForProvider('openai', 'chat'); | ||||||
|             } |             } | ||||||
|             if (selectedEmbeddingProvider === 'openai') { |  | ||||||
|                 await this.fetchModelsForProvider('openai', 'embedding'); |  | ||||||
|             } |  | ||||||
|         }); |         }); | ||||||
|  |  | ||||||
|         this.$widget.find('.anthropic-base-url').on('change', async () => { |         this.$widget.find('.anthropic-base-url').on('change', async () => { | ||||||
| @@ -214,25 +163,17 @@ export default class AiSettingsWidget extends OptionsWidget { | |||||||
|  |  | ||||||
|         this.$widget.find('.ollama-base-url').on('change', async () => { |         this.$widget.find('.ollama-base-url').on('change', async () => { | ||||||
|             const selectedProvider = this.$widget.find('.ai-selected-provider').val() as string; |             const selectedProvider = this.$widget.find('.ai-selected-provider').val() as string; | ||||||
|             const selectedEmbeddingProvider = this.$widget.find('.embedding-selected-provider').val() as string; |  | ||||||
|             if (selectedProvider === 'ollama') { |             if (selectedProvider === 'ollama') { | ||||||
|                 await this.fetchModelsForProvider('ollama', 'chat'); |                 await this.fetchModelsForProvider('ollama', 'chat'); | ||||||
|             } |             } | ||||||
|             if (selectedEmbeddingProvider === 'ollama') { |  | ||||||
|                 await this.fetchModelsForProvider('ollama', 'embedding'); |  | ||||||
|             } |  | ||||||
|         }); |         }); | ||||||
|  |  | ||||||
|         // Add API key change handlers to trigger model fetching |         // Add API key change handlers to trigger model fetching | ||||||
|         this.$widget.find('.openai-api-key').on('change', async () => { |         this.$widget.find('.openai-api-key').on('change', async () => { | ||||||
|             const selectedProvider = this.$widget.find('.ai-selected-provider').val() as string; |             const selectedProvider = this.$widget.find('.ai-selected-provider').val() as string; | ||||||
|             const selectedEmbeddingProvider = this.$widget.find('.embedding-selected-provider').val() as string; |  | ||||||
|             if (selectedProvider === 'openai') { |             if (selectedProvider === 'openai') { | ||||||
|                 await this.fetchModelsForProvider('openai', 'chat'); |                 await this.fetchModelsForProvider('openai', 'chat'); | ||||||
|             } |             } | ||||||
|             if (selectedEmbeddingProvider === 'openai') { |  | ||||||
|                 await this.fetchModelsForProvider('openai', 'embedding'); |  | ||||||
|             } |  | ||||||
|         }); |         }); | ||||||
|  |  | ||||||
|         this.$widget.find('.anthropic-api-key').on('change', async () => { |         this.$widget.find('.anthropic-api-key').on('change', async () => { | ||||||
| @@ -242,85 +183,6 @@ export default class AiSettingsWidget extends OptionsWidget { | |||||||
|             } |             } | ||||||
|         }); |         }); | ||||||
|  |  | ||||||
|         this.$widget.find('.voyage-api-key').on('change', async () => { |  | ||||||
|             const selectedEmbeddingProvider = this.$widget.find('.embedding-selected-provider').val() as string; |  | ||||||
|             if (selectedEmbeddingProvider === 'voyage') { |  | ||||||
|                 // Voyage doesn't have dynamic model fetching yet, but we can add it here when implemented |  | ||||||
|                 console.log('Voyage API key changed - model fetching not yet implemented'); |  | ||||||
|             } |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         // Add embedding base URL change handlers to trigger model fetching |  | ||||||
|         this.$widget.find('.openai-embedding-base-url').on('change', async () => { |  | ||||||
|             const selectedEmbeddingProvider = this.$widget.find('.embedding-selected-provider').val() as string; |  | ||||||
|             if (selectedEmbeddingProvider === 'openai') { |  | ||||||
|                 await this.fetchModelsForProvider('openai', 'embedding'); |  | ||||||
|             } |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         this.$widget.find('.voyage-embedding-base-url').on('change', async () => { |  | ||||||
|             const selectedEmbeddingProvider = this.$widget.find('.embedding-selected-provider').val() as string; |  | ||||||
|             if (selectedEmbeddingProvider === 'voyage') { |  | ||||||
|                 // Voyage doesn't have dynamic model fetching yet, but we can add it here when implemented |  | ||||||
|                 console.log('Voyage embedding base URL changed - model fetching not yet implemented'); |  | ||||||
|             } |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         this.$widget.find('.ollama-embedding-base-url').on('change', async () => { |  | ||||||
|             const selectedEmbeddingProvider = this.$widget.find('.embedding-selected-provider').val() as string; |  | ||||||
|             if (selectedEmbeddingProvider === 'ollama') { |  | ||||||
|                 await this.fetchModelsForProvider('ollama', 'embedding'); |  | ||||||
|             } |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         // Add embedding API key change handlers to trigger model fetching |  | ||||||
|         this.$widget.find('.openai-embedding-api-key').on('change', async () => { |  | ||||||
|             const selectedEmbeddingProvider = this.$widget.find('.embedding-selected-provider').val() as string; |  | ||||||
|             if (selectedEmbeddingProvider === 'openai') { |  | ||||||
|                 await this.fetchModelsForProvider('openai', 'embedding'); |  | ||||||
|             } |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         // No sortable behavior needed anymore |  | ||||||
|  |  | ||||||
|         // Embedding stats refresh button |  | ||||||
|         const $refreshStats = this.$widget.find('.embedding-refresh-stats'); |  | ||||||
|         $refreshStats.on('click', async () => { |  | ||||||
|             await this.refreshEmbeddingStats(); |  | ||||||
|             await this.fetchFailedEmbeddingNotes(); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         // Recreate embeddings button |  | ||||||
|         const $recreateEmbeddings = this.$widget.find('.recreate-embeddings'); |  | ||||||
|         $recreateEmbeddings.on('click', async () => { |  | ||||||
|             if (confirm(t("ai_llm.recreate_embeddings_confirm") || "Are you sure you want to recreate all embeddings? This may take a long time.")) { |  | ||||||
|                 try { |  | ||||||
|                     await server.post('llm/embeddings/reprocess'); |  | ||||||
|                     toastService.showMessage(t("ai_llm.recreate_embeddings_started")); |  | ||||||
|  |  | ||||||
|                     // Start progress polling |  | ||||||
|                     this.pollIndexRebuildProgress(); |  | ||||||
|                 } catch (e) { |  | ||||||
|                     console.error('Error starting embeddings regeneration:', e); |  | ||||||
|                     toastService.showError(t("ai_llm.recreate_embeddings_error")); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         // Rebuild index button |  | ||||||
|         const $rebuildIndex = this.$widget.find('.rebuild-embeddings-index'); |  | ||||||
|         $rebuildIndex.on('click', async () => { |  | ||||||
|             try { |  | ||||||
|                 await server.post('llm/embeddings/rebuild-index'); |  | ||||||
|                 toastService.showMessage(t("ai_llm.rebuild_index_started")); |  | ||||||
|  |  | ||||||
|                 // Start progress polling |  | ||||||
|                 this.pollIndexRebuildProgress(); |  | ||||||
|             } catch (e) { |  | ||||||
|                 console.error('Error starting index rebuild:', e); |  | ||||||
|                 toastService.showError(t("ai_llm.rebuild_index_error")); |  | ||||||
|             } |  | ||||||
|         }); |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
| @@ -360,30 +222,9 @@ export default class AiSettingsWidget extends OptionsWidget { | |||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // Similar checks for embeddings |  | ||||||
|         const embeddingWarnings: string[] = []; |  | ||||||
|         const embeddingsEnabled = this.$widget.find('.enable-automatic-indexing').prop('checked'); |  | ||||||
|  |  | ||||||
|         if (embeddingsEnabled) { |  | ||||||
|             const selectedEmbeddingProvider = this.$widget.find('.embedding-selected-provider').val() as string; |  | ||||||
|  |  | ||||||
|             if (selectedEmbeddingProvider === 'openai' && !this.$widget.find('.openai-api-key').val()) { |  | ||||||
|                 embeddingWarnings.push(t("ai_llm.empty_key_warning.openai")); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             if (selectedEmbeddingProvider === 'voyage' && !this.$widget.find('.voyage-api-key').val()) { |  | ||||||
|                 embeddingWarnings.push(t("ai_llm.empty_key_warning.voyage")); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             if (selectedEmbeddingProvider === 'ollama' && !this.$widget.find('.ollama-embedding-base-url').val()) { |  | ||||||
|                 embeddingWarnings.push(t("ai_llm.empty_key_warning.ollama")); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Combine all warnings |         // Combine all warnings | ||||||
|         const allWarnings = [ |         const allWarnings = [ | ||||||
|             ...providerWarnings, |             ...providerWarnings | ||||||
|             ...embeddingWarnings |  | ||||||
|         ]; |         ]; | ||||||
|  |  | ||||||
|         // Show or hide warnings |         // Show or hide warnings | ||||||
| @@ -396,168 +237,6 @@ export default class AiSettingsWidget extends OptionsWidget { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Poll for index rebuild progress |  | ||||||
|      */ |  | ||||||
|     pollIndexRebuildProgress() { |  | ||||||
|         if (this.indexRebuildRefreshInterval) { |  | ||||||
|             clearInterval(this.indexRebuildRefreshInterval); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Set up polling interval for index rebuild progress |  | ||||||
|         this.indexRebuildRefreshInterval = setInterval(async () => { |  | ||||||
|             await this.refreshEmbeddingStats(); |  | ||||||
|         }, this.STATS_REFRESH_INTERVAL); |  | ||||||
|  |  | ||||||
|         // Stop polling after 5 minutes to avoid indefinite polling |  | ||||||
|         setTimeout(() => { |  | ||||||
|             if (this.indexRebuildRefreshInterval) { |  | ||||||
|                 clearInterval(this.indexRebuildRefreshInterval); |  | ||||||
|                 this.indexRebuildRefreshInterval = null; |  | ||||||
|             } |  | ||||||
|         }, 5 * 60 * 1000); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Refresh embedding statistics |  | ||||||
|      */ |  | ||||||
|     async refreshEmbeddingStats() { |  | ||||||
|         if (!this.$widget) return; |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             const response = await server.get<EmbeddingStats>('llm/embeddings/stats'); |  | ||||||
|  |  | ||||||
|             if (response && response.success) { |  | ||||||
|                 const stats = response.stats; |  | ||||||
|  |  | ||||||
|                 // Update stats display |  | ||||||
|                 this.$widget.find('.embedding-processed-notes').text(stats.embeddedNotesCount); |  | ||||||
|                 this.$widget.find('.embedding-total-notes').text(stats.totalNotesCount); |  | ||||||
|                 this.$widget.find('.embedding-queued-notes').text(stats.queuedNotesCount); |  | ||||||
|                 this.$widget.find('.embedding-failed-notes').text(stats.failedNotesCount); |  | ||||||
|  |  | ||||||
|                 if (stats.lastProcessedDate) { |  | ||||||
|                     const date = new Date(stats.lastProcessedDate); |  | ||||||
|                     this.$widget.find('.embedding-last-processed').text(date.toLocaleString()); |  | ||||||
|                 } else { |  | ||||||
|                     this.$widget.find('.embedding-last-processed').text('-'); |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 // Update progress bar |  | ||||||
|                 const $progressBar = this.$widget.find('.embedding-progress'); |  | ||||||
|                 const progressPercent = stats.percentComplete; |  | ||||||
|                 $progressBar.css('width', `${progressPercent}%`); |  | ||||||
|                 $progressBar.attr('aria-valuenow', progressPercent.toString()); |  | ||||||
|                 $progressBar.text(`${progressPercent}%`); |  | ||||||
|  |  | ||||||
|                 // Update status text |  | ||||||
|                 let statusText; |  | ||||||
|                 if (stats.queuedNotesCount > 0) { |  | ||||||
|                     statusText = t("ai_llm.agent.processing", { percentage: progressPercent }); |  | ||||||
|                 } else if (stats.embeddedNotesCount === 0) { |  | ||||||
|                     statusText = t("ai_llm.not_started"); |  | ||||||
|                 } else if (stats.embeddedNotesCount === stats.totalNotesCount) { |  | ||||||
|                     statusText = t("ai_llm.complete"); |  | ||||||
|  |  | ||||||
|                     // Clear polling interval if processing is complete |  | ||||||
|                     if (this.indexRebuildRefreshInterval) { |  | ||||||
|                         clearInterval(this.indexRebuildRefreshInterval); |  | ||||||
|                         this.indexRebuildRefreshInterval = null; |  | ||||||
|                     } |  | ||||||
|                 } else { |  | ||||||
|                     statusText = t("ai_llm.partial", { percentage: progressPercent }); |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 this.$widget.find('.embedding-status-text').text(statusText); |  | ||||||
|             } |  | ||||||
|         } catch (e) { |  | ||||||
|             console.error('Error fetching embedding stats:', e); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Fetch failed embedding notes |  | ||||||
|      */ |  | ||||||
|     async fetchFailedEmbeddingNotes() { |  | ||||||
|         if (!this.$widget) return; |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             const response = await server.get<FailedEmbeddingNotes>('llm/embeddings/failed'); |  | ||||||
|  |  | ||||||
|             if (response && response.success) { |  | ||||||
|                 const failedNotes = response.failedNotes || []; |  | ||||||
|                 const $failedNotesList = this.$widget.find('.embedding-failed-notes-list'); |  | ||||||
|  |  | ||||||
|                 if (failedNotes.length === 0) { |  | ||||||
|                     $failedNotesList.html(`<div class="alert alert-info">${t("ai_llm.no_failed_embeddings")}</div>`); |  | ||||||
|                     return; |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 // Create a table with failed notes |  | ||||||
|                 let html = ` |  | ||||||
|                 <table class="table table-sm table-striped"> |  | ||||||
|                     <thead> |  | ||||||
|                         <tr> |  | ||||||
|                             <th>${t("ai_llm.note_title")}</th> |  | ||||||
|                             <th>${t("ai_llm.error")}</th> |  | ||||||
|                             <th>${t("ai_llm.last_attempt")}</th> |  | ||||||
|                             <th>${t("ai_llm.actions")}</th> |  | ||||||
|                         </tr> |  | ||||||
|                     </thead> |  | ||||||
|                     <tbody> |  | ||||||
|                 `; |  | ||||||
|  |  | ||||||
|                 for (const note of failedNotes) { |  | ||||||
|                     const date = new Date(note.lastAttempt); |  | ||||||
|                     const isPermanent = note.isPermanent; |  | ||||||
|                     const noteTitle = note.title || note.noteId; |  | ||||||
|  |  | ||||||
|                     html += ` |  | ||||||
|                     <tr data-note-id="${note.noteId}"> |  | ||||||
|                         <td><a href="#" class="open-note">${noteTitle}</a></td> |  | ||||||
|                         <td>${note.error}</td> |  | ||||||
|                         <td>${date.toLocaleString()}</td> |  | ||||||
|                         <td> |  | ||||||
|                             <button class="btn btn-sm btn-outline-secondary retry-embedding" ${isPermanent ? 'disabled' : ''}> |  | ||||||
|                                 ${t("ai_llm.retry")} |  | ||||||
|                             </button> |  | ||||||
|                         </td> |  | ||||||
|                     </tr> |  | ||||||
|                     `; |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 html += ` |  | ||||||
|                     </tbody> |  | ||||||
|                 </table> |  | ||||||
|                 `; |  | ||||||
|  |  | ||||||
|                 $failedNotesList.html(html); |  | ||||||
|  |  | ||||||
|                 // Add event handlers for retry buttons |  | ||||||
|                 $failedNotesList.find('.retry-embedding').on('click', async function() { |  | ||||||
|                     const noteId = $(this).closest('tr').data('note-id'); |  | ||||||
|                     try { |  | ||||||
|                         await server.post('llm/embeddings/retry', { noteId }); |  | ||||||
|                         toastService.showMessage(t("ai_llm.retry_queued")); |  | ||||||
|                         // Remove this row or update status |  | ||||||
|                         $(this).closest('tr').remove(); |  | ||||||
|                     } catch (e) { |  | ||||||
|                         console.error('Error retrying embedding:', e); |  | ||||||
|                         toastService.showError(t("ai_llm.retry_failed")); |  | ||||||
|                     } |  | ||||||
|                 }); |  | ||||||
|  |  | ||||||
|                 // Add event handlers for open note links |  | ||||||
|                 $failedNotesList.find('.open-note').on('click', function(e) { |  | ||||||
|                     e.preventDefault(); |  | ||||||
|                     const noteId = $(this).closest('tr').data('note-id'); |  | ||||||
|                     window.open(`#${noteId}`, '_blank'); |  | ||||||
|                 }); |  | ||||||
|             } |  | ||||||
|         } catch (e) { |  | ||||||
|             console.error('Error fetching failed embedding notes:', e); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
|      * Helper to get display name for providers |      * Helper to get display name for providers | ||||||
| @@ -594,7 +273,7 @@ export default class AiSettingsWidget extends OptionsWidget { | |||||||
|     /** |     /** | ||||||
|      * Fetch models for a specific provider and model type |      * Fetch models for a specific provider and model type | ||||||
|      */ |      */ | ||||||
|     async fetchModelsForProvider(provider: string, modelType: 'chat' | 'embedding') { |     async fetchModelsForProvider(provider: string, modelType: 'chat') { | ||||||
|         if (!this.providerService) return; |         if (!this.providerService) return; | ||||||
|  |  | ||||||
|         try { |         try { | ||||||
| @@ -629,12 +308,6 @@ export default class AiSettingsWidget extends OptionsWidget { | |||||||
|             this.$widget.find(`.${selectedAiProvider}-provider-settings`).show(); |             this.$widget.find(`.${selectedAiProvider}-provider-settings`).show(); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // Update embedding provider settings visibility |  | ||||||
|         const selectedEmbeddingProvider = this.$widget.find('.embedding-selected-provider').val() as string; |  | ||||||
|         this.$widget.find('.embedding-provider-settings').hide(); |  | ||||||
|         if (selectedEmbeddingProvider) { |  | ||||||
|             this.$widget.find(`.${selectedEmbeddingProvider}-embedding-provider-settings`).show(); |  | ||||||
|         } |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
| @@ -653,7 +326,6 @@ export default class AiSettingsWidget extends OptionsWidget { | |||||||
|         this.$widget.find('.openai-api-key').val(options.openaiApiKey || ''); |         this.$widget.find('.openai-api-key').val(options.openaiApiKey || ''); | ||||||
|         this.$widget.find('.openai-base-url').val(options.openaiBaseUrl || 'https://api.openai.com/v1'); |         this.$widget.find('.openai-base-url').val(options.openaiBaseUrl || 'https://api.openai.com/v1'); | ||||||
|         this.setModelDropdownValue('.openai-default-model', options.openaiDefaultModel); |         this.setModelDropdownValue('.openai-default-model', options.openaiDefaultModel); | ||||||
|         this.setModelDropdownValue('.openai-embedding-model', options.openaiEmbeddingModel); |  | ||||||
|  |  | ||||||
|         // Anthropic Section |         // Anthropic Section | ||||||
|         this.$widget.find('.anthropic-api-key').val(options.anthropicApiKey || ''); |         this.$widget.find('.anthropic-api-key').val(options.anthropicApiKey || ''); | ||||||
| @@ -662,58 +334,26 @@ export default class AiSettingsWidget extends OptionsWidget { | |||||||
|  |  | ||||||
|         // Voyage Section |         // Voyage Section | ||||||
|         this.$widget.find('.voyage-api-key').val(options.voyageApiKey || ''); |         this.$widget.find('.voyage-api-key').val(options.voyageApiKey || ''); | ||||||
|         this.$widget.find('.voyage-embedding-base-url').val(options.voyageEmbeddingBaseUrl || 'https://api.voyageai.com/v1'); |  | ||||||
|         this.setModelDropdownValue('.voyage-embedding-model', options.voyageEmbeddingModel); |  | ||||||
|  |  | ||||||
|         // Ollama Section |         // Ollama Section | ||||||
|         this.$widget.find('.ollama-base-url').val(options.ollamaBaseUrl || 'http://localhost:11434'); |         this.$widget.find('.ollama-base-url').val(options.ollamaBaseUrl || 'http://localhost:11434'); | ||||||
|         this.$widget.find('.ollama-embedding-base-url').val(options.ollamaEmbeddingBaseUrl || 'http://localhost:11434'); |  | ||||||
|         this.setModelDropdownValue('.ollama-default-model', options.ollamaDefaultModel); |         this.setModelDropdownValue('.ollama-default-model', options.ollamaDefaultModel); | ||||||
|         this.setModelDropdownValue('.ollama-embedding-model', options.ollamaEmbeddingModel); |  | ||||||
|  |  | ||||||
|         // Embedding-specific provider options |  | ||||||
|         this.$widget.find('.openai-embedding-api-key').val(options.openaiEmbeddingApiKey || ''); |  | ||||||
|         this.$widget.find('.openai-embedding-base-url').val(options.openaiEmbeddingBaseUrl || 'https://api.openai.com/v1'); |  | ||||||
|  |  | ||||||
|         // Embedding Options |  | ||||||
|         this.$widget.find('.embedding-selected-provider').val(options.embeddingSelectedProvider || 'openai'); |  | ||||||
|         this.$widget.find('.embedding-auto-update-enabled').prop('checked', options.embeddingAutoUpdateEnabled !== 'false'); |  | ||||||
|         this.$widget.find('.enable-automatic-indexing').prop('checked', options.enableAutomaticIndexing !== 'false'); |  | ||||||
|         this.$widget.find('.embedding-similarity-threshold').val(options.embeddingSimilarityThreshold || '0.75'); |  | ||||||
|         this.$widget.find('.max-notes-per-llm-query').val(options.maxNotesPerLlmQuery || '3'); |  | ||||||
|         this.$widget.find('.embedding-dimension-strategy').val(options.embeddingDimensionStrategy || 'auto'); |  | ||||||
|         this.$widget.find('.embedding-batch-size').val(options.embeddingBatchSize || '10'); |  | ||||||
|         this.$widget.find('.embedding-update-interval').val(options.embeddingUpdateInterval || '5000'); |  | ||||||
|  |  | ||||||
|         // Show/hide provider settings based on selected providers |         // Show/hide provider settings based on selected providers | ||||||
|         this.updateProviderSettingsVisibility(); |         this.updateProviderSettingsVisibility(); | ||||||
|  |  | ||||||
|         // Automatically fetch models for currently selected providers |         // Automatically fetch models for currently selected providers | ||||||
|         const selectedAiProvider = this.$widget.find('.ai-selected-provider').val() as string; |         const selectedAiProvider = this.$widget.find('.ai-selected-provider').val() as string; | ||||||
|         const selectedEmbeddingProvider = this.$widget.find('.embedding-selected-provider').val() as string; |  | ||||||
|  |  | ||||||
|         if (selectedAiProvider) { |         if (selectedAiProvider) { | ||||||
|             await this.fetchModelsForProvider(selectedAiProvider, 'chat'); |             await this.fetchModelsForProvider(selectedAiProvider, 'chat'); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         if (selectedEmbeddingProvider) { |  | ||||||
|             await this.fetchModelsForProvider(selectedEmbeddingProvider, 'embedding'); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Display validation warnings |         // Display validation warnings | ||||||
|         this.displayValidationWarnings(); |         this.displayValidationWarnings(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     cleanup() { |     cleanup() { | ||||||
|         // Clear intervals |         // Cleanup method for widget | ||||||
|         if (this.statsRefreshInterval) { |  | ||||||
|             clearInterval(this.statsRefreshInterval); |  | ||||||
|             this.statsRefreshInterval = null; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (this.indexRebuildRefreshInterval) { |  | ||||||
|             clearInterval(this.indexRebuildRefreshInterval); |  | ||||||
|             this.indexRebuildRefreshInterval = null; |  | ||||||
|         } |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -11,34 +11,6 @@ export interface OllamaModelResponse { | |||||||
|     }>; |     }>; | ||||||
| } | } | ||||||
|  |  | ||||||
| // Interface for embedding statistics |  | ||||||
| export interface EmbeddingStats { |  | ||||||
|     success: boolean; |  | ||||||
|     stats: { |  | ||||||
|         totalNotesCount: number; |  | ||||||
|         embeddedNotesCount: number; |  | ||||||
|         queuedNotesCount: number; |  | ||||||
|         failedNotesCount: number; |  | ||||||
|         lastProcessedDate: string | null; |  | ||||||
|         percentComplete: number; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Interface for failed embedding notes |  | ||||||
| export interface FailedEmbeddingNotes { |  | ||||||
|     success: boolean; |  | ||||||
|     failedNotes: Array<{ |  | ||||||
|         noteId: string; |  | ||||||
|         title?: string; |  | ||||||
|         operation: string; |  | ||||||
|         attempts: number; |  | ||||||
|         lastAttempt: string; |  | ||||||
|         error: string; |  | ||||||
|         failureType: string; |  | ||||||
|         chunks: number; |  | ||||||
|         isPermanent: boolean; |  | ||||||
|     }>; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| export interface OpenAIModelResponse { | export interface OpenAIModelResponse { | ||||||
|     success: boolean; |     success: boolean; | ||||||
| @@ -47,11 +19,6 @@ export interface OpenAIModelResponse { | |||||||
|         name: string; |         name: string; | ||||||
|         type: string; |         type: string; | ||||||
|     }>; |     }>; | ||||||
|     embeddingModels: Array<{ |  | ||||||
|         id: string; |  | ||||||
|         name: string; |  | ||||||
|         type: string; |  | ||||||
|     }>; |  | ||||||
| } | } | ||||||
|  |  | ||||||
| export interface AnthropicModelResponse { | export interface AnthropicModelResponse { | ||||||
| @@ -61,9 +28,4 @@ export interface AnthropicModelResponse { | |||||||
|         name: string; |         name: string; | ||||||
|         type: string; |         type: string; | ||||||
|     }>; |     }>; | ||||||
|     embeddingModels: Array<{ |  | ||||||
|         id: string; |  | ||||||
|         name: string; |  | ||||||
|         type: string; |  | ||||||
|     }>; |  | ||||||
| } | } | ||||||
| @@ -6,21 +6,7 @@ import type { OpenAIModelResponse, AnthropicModelResponse, OllamaModelResponse } | |||||||
|  |  | ||||||
| export class ProviderService { | export class ProviderService { | ||||||
|     constructor(private $widget: JQuery<HTMLElement>) { |     constructor(private $widget: JQuery<HTMLElement>) { | ||||||
|         // Initialize Voyage models (since they don't have a dynamic refresh yet) |         // Embedding functionality removed | ||||||
|         this.initializeVoyageModels(); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Initialize Voyage models with default values and ensure proper selection |  | ||||||
|      */ |  | ||||||
|     private initializeVoyageModels() { |  | ||||||
|         setTimeout(() => { |  | ||||||
|             const $voyageModelSelect = this.$widget.find('.voyage-embedding-model'); |  | ||||||
|             if ($voyageModelSelect.length > 0) { |  | ||||||
|                 const currentValue = $voyageModelSelect.val(); |  | ||||||
|                 this.ensureSelectedValue($voyageModelSelect, currentValue, 'voyageEmbeddingModel'); |  | ||||||
|             } |  | ||||||
|         }, 100); // Small delay to ensure the widget is fully initialized |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
| @@ -95,29 +81,10 @@ export class ProviderService { | |||||||
|                     this.ensureSelectedValue($chatModelSelect, currentChatValue, 'openaiDefaultModel'); |                     this.ensureSelectedValue($chatModelSelect, currentChatValue, 'openaiDefaultModel'); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                 // Update the embedding models dropdown |  | ||||||
|                 if (response.embeddingModels?.length > 0) { |  | ||||||
|                     const $embedModelSelect = this.$widget.find('.openai-embedding-model'); |  | ||||||
|                     const currentEmbedValue = $embedModelSelect.val(); |  | ||||||
|  |  | ||||||
|                     // Clear existing options |  | ||||||
|                     $embedModelSelect.empty(); |  | ||||||
|  |  | ||||||
|                     // Sort models by name |  | ||||||
|                     const sortedEmbedModels = [...response.embeddingModels].sort((a, b) => a.name.localeCompare(b.name)); |  | ||||||
|  |  | ||||||
|                     // Add models to the dropdown |  | ||||||
|                     sortedEmbedModels.forEach(model => { |  | ||||||
|                         $embedModelSelect.append(`<option value="${model.id}">${model.name}</option>`); |  | ||||||
|                     }); |  | ||||||
|  |  | ||||||
|                     // Try to restore the previously selected value |  | ||||||
|                     this.ensureSelectedValue($embedModelSelect, currentEmbedValue, 'openaiEmbeddingModel'); |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 if (showLoading) { |                 if (showLoading) { | ||||||
|                     // Show success message |                     // Show success message | ||||||
|                     const totalModels = (response.chatModels?.length || 0) + (response.embeddingModels?.length || 0); |                     const totalModels = (response.chatModels?.length || 0); | ||||||
|                     toastService.showMessage(`${totalModels} OpenAI models found.`); |                     toastService.showMessage(`${totalModels} OpenAI models found.`); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
| @@ -187,14 +154,9 @@ export class ProviderService { | |||||||
|                     this.ensureSelectedValue($chatModelSelect, currentChatValue, 'anthropicDefaultModel'); |                     this.ensureSelectedValue($chatModelSelect, currentChatValue, 'anthropicDefaultModel'); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                 // Handle embedding models if they exist |  | ||||||
|                 if (response.embeddingModels?.length > 0 && showLoading) { |  | ||||||
|                     toastService.showMessage(`Found ${response.embeddingModels.length} Anthropic embedding models.`); |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 if (showLoading) { |                 if (showLoading) { | ||||||
|                     // Show success message |                     // Show success message | ||||||
|                     const totalModels = (response.chatModels?.length || 0) + (response.embeddingModels?.length || 0); |                     const totalModels = (response.chatModels?.length || 0); | ||||||
|                     toastService.showMessage(`${totalModels} Anthropic models found.`); |                     toastService.showMessage(`${totalModels} Anthropic models found.`); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
| @@ -240,66 +202,13 @@ export class ProviderService { | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         try { |         try { | ||||||
|             // Determine which URL to use based on the current context |             // Use the general Ollama base URL | ||||||
|             // If we're in the embedding provider context, use the embedding base URL |             const ollamaBaseUrl = this.$widget.find('.ollama-base-url').val() as string; | ||||||
|             // Otherwise, use the general base URL |  | ||||||
|             const selectedAiProvider = this.$widget.find('.ai-selected-provider').val() as string; |  | ||||||
|             const selectedEmbeddingProvider = this.$widget.find('.embedding-selected-provider').val() as string; |  | ||||||
|              |  | ||||||
|             let ollamaBaseUrl: string; |  | ||||||
|              |  | ||||||
|             // If embedding provider is Ollama and it's visible, use embedding URL |  | ||||||
|             const $embeddingOllamaSettings = this.$widget.find('.ollama-embedding-provider-settings'); |  | ||||||
|             if (selectedEmbeddingProvider === 'ollama' && $embeddingOllamaSettings.is(':visible')) { |  | ||||||
|                 ollamaBaseUrl = this.$widget.find('.ollama-embedding-base-url').val() as string; |  | ||||||
|             } else { |  | ||||||
|                 ollamaBaseUrl = this.$widget.find('.ollama-base-url').val() as string; |  | ||||||
|             } |  | ||||||
|              |              | ||||||
|             const response = await server.get<OllamaModelResponse>(`llm/providers/ollama/models?baseUrl=${encodeURIComponent(ollamaBaseUrl)}`); |             const response = await server.get<OllamaModelResponse>(`llm/providers/ollama/models?baseUrl=${encodeURIComponent(ollamaBaseUrl)}`); | ||||||
|  |  | ||||||
|             if (response && response.success && response.models && response.models.length > 0) { |             if (response && response.success && response.models && response.models.length > 0) { | ||||||
|                 // Update both embedding model dropdowns |                 // Update the LLM model dropdown | ||||||
|                 const $embedModelSelect = this.$widget.find('.ollama-embedding-model'); |  | ||||||
|                 const $chatEmbedModelSelect = this.$widget.find('.ollama-chat-embedding-model'); |  | ||||||
|                  |  | ||||||
|                 const currentValue = $embedModelSelect.val(); |  | ||||||
|                 const currentChatEmbedValue = $chatEmbedModelSelect.val(); |  | ||||||
|  |  | ||||||
|                 // Prepare embedding models |  | ||||||
|                 const embeddingModels = response.models.filter(model => |  | ||||||
|                     model.name.includes('embed') || model.name.includes('bert')); |  | ||||||
|                  |  | ||||||
|                 const generalModels = response.models.filter(model => |  | ||||||
|                     !model.name.includes('embed') && !model.name.includes('bert')); |  | ||||||
|  |  | ||||||
|                 // Update .ollama-embedding-model dropdown (embedding provider settings) |  | ||||||
|                 $embedModelSelect.empty(); |  | ||||||
|                 embeddingModels.forEach(model => { |  | ||||||
|                     $embedModelSelect.append(`<option value="${model.name}">${model.name}</option>`); |  | ||||||
|                 }); |  | ||||||
|                 if (embeddingModels.length > 0) { |  | ||||||
|                     $embedModelSelect.append(`<option disabled>─────────────</option>`); |  | ||||||
|                 } |  | ||||||
|                 generalModels.forEach(model => { |  | ||||||
|                     $embedModelSelect.append(`<option value="${model.name}">${model.name}</option>`); |  | ||||||
|                 }); |  | ||||||
|                 this.ensureSelectedValue($embedModelSelect, currentValue, 'ollamaEmbeddingModel'); |  | ||||||
|  |  | ||||||
|                 // Update .ollama-chat-embedding-model dropdown (general Ollama provider settings) |  | ||||||
|                 $chatEmbedModelSelect.empty(); |  | ||||||
|                 embeddingModels.forEach(model => { |  | ||||||
|                     $chatEmbedModelSelect.append(`<option value="${model.name}">${model.name}</option>`); |  | ||||||
|                 }); |  | ||||||
|                 if (embeddingModels.length > 0) { |  | ||||||
|                     $chatEmbedModelSelect.append(`<option disabled>─────────────</option>`); |  | ||||||
|                 } |  | ||||||
|                 generalModels.forEach(model => { |  | ||||||
|                     $chatEmbedModelSelect.append(`<option value="${model.name}">${model.name}</option>`); |  | ||||||
|                 }); |  | ||||||
|                 this.ensureSelectedValue($chatEmbedModelSelect, currentChatEmbedValue, 'ollamaEmbeddingModel'); |  | ||||||
|  |  | ||||||
|                 // Also update the LLM model dropdown |  | ||||||
|                 const $modelSelect = this.$widget.find('.ollama-default-model'); |                 const $modelSelect = this.$widget.find('.ollama-default-model'); | ||||||
|                 const currentModelValue = $modelSelect.val(); |                 const currentModelValue = $modelSelect.val(); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -16,46 +16,7 @@ export const TPL = ` | |||||||
|     </div> |     </div> | ||||||
| </div> | </div> | ||||||
|  |  | ||||||
| <div class="options-section"> | <!-- Embedding statistics section removed --> | ||||||
|     <h4>${t("ai_llm.embedding_statistics")}</h4> |  | ||||||
|     <div class="embedding-stats-container"> |  | ||||||
|         <div class="embedding-stats"> |  | ||||||
|             <div class="row"> |  | ||||||
|                 <div class="col-md-6"> |  | ||||||
|                     <div><strong>${t("ai_llm.processed_notes")}:</strong> <span class="embedding-processed-notes">-</span></div> |  | ||||||
|                     <div><strong>${t("ai_llm.total_notes")}:</strong> <span class="embedding-total-notes">-</span></div> |  | ||||||
|                     <div><strong>${t("ai_llm.progress")}:</strong> <span class="embedding-status-text">-</span></div> |  | ||||||
|                 </div> |  | ||||||
|  |  | ||||||
|                 <div class="col-md-6"> |  | ||||||
|                     <div><strong>${t("ai_llm.queued_notes")}:</strong> <span class="embedding-queued-notes">-</span></div> |  | ||||||
|                     <div><strong>${t("ai_llm.failed_notes")}:</strong> <span class="embedding-failed-notes">-</span></div> |  | ||||||
|                     <div><strong>${t("ai_llm.last_processed")}:</strong> <span class="embedding-last-processed">-</span></div> |  | ||||||
|                 </div> |  | ||||||
|             </div> |  | ||||||
|         </div> |  | ||||||
|         <div class="progress mt-1" style="height: 10px;"> |  | ||||||
|             <div class="progress-bar embedding-progress" role="progressbar" style="width: 0%;" |  | ||||||
|                 aria-valuenow="0" aria-valuemin="0" aria-valuemax="100">0%</div> |  | ||||||
|         </div> |  | ||||||
|         <div class="mt-2"> |  | ||||||
|             <button class="btn btn-sm btn-outline-secondary embedding-refresh-stats"> |  | ||||||
|                 ${t("ai_llm.refresh_stats")} |  | ||||||
|             </button> |  | ||||||
|         </div> |  | ||||||
|     </div> |  | ||||||
|  |  | ||||||
|     <hr/> |  | ||||||
|     <!-- Failed embeddings section --> |  | ||||||
|     <h5>${t("ai_llm.failed_notes")}</h4> |  | ||||||
|     <div class="form-group mt-4"> |  | ||||||
|         <div class="embedding-failed-notes-container"> |  | ||||||
|             <div class="embedding-failed-notes-list"> |  | ||||||
|                 <div class="alert alert-info">${t("ai_llm.no_failed_embeddings")}</div> |  | ||||||
|             </div> |  | ||||||
|         </div> |  | ||||||
|     </div> |  | ||||||
| </div> |  | ||||||
|  |  | ||||||
| <div class="ai-providers-section options-section"> | <div class="ai-providers-section options-section"> | ||||||
|     <h4>${t("ai_llm.provider_configuration")}</h4> |     <h4>${t("ai_llm.provider_configuration")}</h4> | ||||||
| @@ -171,188 +132,4 @@ export const TPL = ` | |||||||
|     </div> |     </div> | ||||||
| </div> | </div> | ||||||
|  |  | ||||||
|  | `; | ||||||
| <div class="options-section"> |  | ||||||
|     <h4>${t("ai_llm.embeddings_configuration")}</h4> |  | ||||||
|  |  | ||||||
|     <div class="form-group"> |  | ||||||
|         <label class="embedding-provider-label">${t("ai_llm.selected_embedding_provider")}</label> |  | ||||||
|         <select class="embedding-selected-provider form-control"> |  | ||||||
|             <option value="">${t("ai_llm.select_embedding_provider")}</option> |  | ||||||
|             <option value="openai">OpenAI</option> |  | ||||||
|             <option value="voyage">Voyage AI</option> |  | ||||||
|             <option value="ollama">Ollama</option> |  | ||||||
|             <option value="local">Local</option> |  | ||||||
|         </select> |  | ||||||
|         <div class="form-text">${t("ai_llm.selected_embedding_provider_description")}</div> |  | ||||||
|     </div> |  | ||||||
|  |  | ||||||
|     <!-- OpenAI Embedding Provider Settings --> |  | ||||||
|     <div class="embedding-provider-settings openai-embedding-provider-settings" style="display: none;"> |  | ||||||
|         <div class="card mt-3"> |  | ||||||
|             <div class="card-header"> |  | ||||||
|                 <h5>${t("ai_llm.openai_embedding_settings")}</h5> |  | ||||||
|             </div> |  | ||||||
|             <div class="card-body"> |  | ||||||
|                 <div class="form-group"> |  | ||||||
|                     <label>${t("ai_llm.api_key")}</label> |  | ||||||
|                     <input type="password" class="openai-embedding-api-key form-control" autocomplete="off" /> |  | ||||||
|                     <div class="form-text">${t("ai_llm.openai_embedding_api_key_description")}</div> |  | ||||||
|                 </div> |  | ||||||
|  |  | ||||||
|                 <div class="form-group"> |  | ||||||
|                     <label>${t("ai_llm.url")}</label> |  | ||||||
|                     <input type="text" class="openai-embedding-base-url form-control" /> |  | ||||||
|                     <div class="form-text">${t("ai_llm.openai_embedding_url_description")}</div> |  | ||||||
|                 </div> |  | ||||||
|  |  | ||||||
|                 <div class="form-group"> |  | ||||||
|                     <label>${t("ai_llm.embedding_model")}</label> |  | ||||||
|                     <select class="openai-embedding-model form-control"> |  | ||||||
|                         <option value="">${t("ai_llm.select_model")}</option> |  | ||||||
|                     </select> |  | ||||||
|                     <div class="form-text">${t("ai_llm.openai_embedding_model_description")}</div> |  | ||||||
|                 </div> |  | ||||||
|             </div> |  | ||||||
|         </div> |  | ||||||
|     </div> |  | ||||||
|  |  | ||||||
|     <!-- Voyage Embedding Provider Settings --> |  | ||||||
|     <div class="embedding-provider-settings voyage-embedding-provider-settings" style="display: none;"> |  | ||||||
|         <div class="card mt-3"> |  | ||||||
|             <div class="card-header"> |  | ||||||
|                 <h5>${t("ai_llm.voyage_settings")}</h5> |  | ||||||
|             </div> |  | ||||||
|             <div class="card-body"> |  | ||||||
|                 <div class="form-group"> |  | ||||||
|                     <label>${t("ai_llm.api_key")}</label> |  | ||||||
|                     <input type="password" class="voyage-api-key form-control" autocomplete="off" /> |  | ||||||
|                     <div class="form-text">${t("ai_llm.voyage_api_key_description")}</div> |  | ||||||
|                 </div> |  | ||||||
|  |  | ||||||
|                 <div class="form-group"> |  | ||||||
|                     <label>${t("ai_llm.url")}</label> |  | ||||||
|                     <input type="text" class="voyage-embedding-base-url form-control" /> |  | ||||||
|                     <div class="form-text">${t("ai_llm.voyage_embedding_url_description")}</div> |  | ||||||
|                 </div> |  | ||||||
|  |  | ||||||
|                 <div class="form-group"> |  | ||||||
|                     <label>${t("ai_llm.embedding_model")}</label> |  | ||||||
|                     <select class="voyage-embedding-model form-control"> |  | ||||||
|                         <option value="">${t("ai_llm.select_model")}</option> |  | ||||||
|                     </select> |  | ||||||
|                     <div class="form-text">${t("ai_llm.voyage_embedding_model_description")}</div> |  | ||||||
|                 </div> |  | ||||||
|             </div> |  | ||||||
|         </div> |  | ||||||
|     </div> |  | ||||||
|  |  | ||||||
|     <!-- Ollama Embedding Provider Settings --> |  | ||||||
|     <div class="embedding-provider-settings ollama-embedding-provider-settings" style="display: none;"> |  | ||||||
|         <div class="card mt-3"> |  | ||||||
|             <div class="card-header"> |  | ||||||
|                 <h5>${t("ai_llm.ollama_embedding_settings")}</h5> |  | ||||||
|             </div> |  | ||||||
|             <div class="card-body"> |  | ||||||
|                 <div class="form-group"> |  | ||||||
|                     <label>${t("ai_llm.url")}</label> |  | ||||||
|                     <input type="text" class="ollama-embedding-base-url form-control" /> |  | ||||||
|                     <div class="form-text">${t("ai_llm.ollama_embedding_url_description")}</div> |  | ||||||
|                 </div> |  | ||||||
|  |  | ||||||
|                 <div class="form-group"> |  | ||||||
|                     <label>${t("ai_llm.embedding_model")}</label> |  | ||||||
|                     <select class="ollama-embedding-model form-control"> |  | ||||||
|                         <option value="">${t("ai_llm.select_model")}</option> |  | ||||||
|                     </select> |  | ||||||
|                     <div class="form-text">${t("ai_llm.ollama_embedding_model_description")}</div> |  | ||||||
|                 </div> |  | ||||||
|             </div> |  | ||||||
|         </div> |  | ||||||
|     </div> |  | ||||||
|  |  | ||||||
|     <!-- Local Embedding Provider Settings --> |  | ||||||
|     <div class="embedding-provider-settings local-embedding-provider-settings" style="display: none;"> |  | ||||||
|         <div class="card mt-3"> |  | ||||||
|             <div class="card-header"> |  | ||||||
|                 <h5>${t("ai_llm.local_embedding_settings")}</h5> |  | ||||||
|             </div> |  | ||||||
|             <div class="card-body"> |  | ||||||
|                 <div class="form-text">${t("ai_llm.local_embedding_description")}</div> |  | ||||||
|             </div> |  | ||||||
|         </div> |  | ||||||
|     </div> |  | ||||||
|  |  | ||||||
|     <div class="form-group"> |  | ||||||
|         <label>${t("ai_llm.embedding_dimension_strategy")}</label> |  | ||||||
|         <select class="embedding-dimension-strategy form-control"> |  | ||||||
|             <option value="auto">${t("ai_llm.embedding_dimension_auto")}</option> |  | ||||||
|             <option value="fixed-768">${t("ai_llm.embedding_dimension_fixed")} (768)</option> |  | ||||||
|             <option value="fixed-1024">${t("ai_llm.embedding_dimension_fixed")} (1024)</option> |  | ||||||
|             <option value="fixed-1536">${t("ai_llm.embedding_dimension_fixed")} (1536)</option> |  | ||||||
|         </select> |  | ||||||
|         <div class="form-text">${t("ai_llm.embedding_dimension_strategy_description")}</div> |  | ||||||
|     </div> |  | ||||||
|  |  | ||||||
|     <div class="form-group"> |  | ||||||
|         <label>${t("ai_llm.embedding_similarity_threshold")}</label> |  | ||||||
|         <input class="embedding-similarity-threshold form-control" type="number" min="0" max="1" step="0.01"> |  | ||||||
|         <div class="form-text">${t("ai_llm.embedding_similarity_threshold_description")}</div> |  | ||||||
|     </div> |  | ||||||
|  |  | ||||||
|     <div class="form-group"> |  | ||||||
|         <label>${t("ai_llm.embedding_batch_size")}</label> |  | ||||||
|         <input class="embedding-batch-size form-control" type="number" min="1" max="100" step="1"> |  | ||||||
|         <div class="form-text">${t("ai_llm.embedding_batch_size_description")}</div> |  | ||||||
|     </div> |  | ||||||
|  |  | ||||||
|     <div class="form-group"> |  | ||||||
|         <label>${t("ai_llm.embedding_update_interval")}</label> |  | ||||||
|         <input class="embedding-update-interval form-control" type="number" min="100" max="60000" step="100"> |  | ||||||
|         <div class="form-text">${t("ai_llm.embedding_update_interval_description")}</div> |  | ||||||
|     </div> |  | ||||||
|  |  | ||||||
|     <div class="form-group"> |  | ||||||
|         <label>${t("ai_llm.max_notes_per_llm_query")}</label> |  | ||||||
|         <input class="max-notes-per-llm-query form-control" type="number" min="1" max="20" step="1"> |  | ||||||
|         <div class="form-text">${t("ai_llm.max_notes_per_llm_query_description")}</div> |  | ||||||
|     </div> |  | ||||||
|  |  | ||||||
|     <div class="form-group"> |  | ||||||
|         <label class="tn-checkbox"> |  | ||||||
|             <input class="enable-automatic-indexing form-check-input" type="checkbox"> |  | ||||||
|             ${t("ai_llm.enable_automatic_indexing")} |  | ||||||
|         </label> |  | ||||||
|         <div class="form-text">${t("ai_llm.enable_automatic_indexing_description")}</div> |  | ||||||
|     </div> |  | ||||||
|  |  | ||||||
|     <div class="form-group mt-3"> |  | ||||||
|         <label class="tn-checkbox"> |  | ||||||
|             <input class="embedding-auto-update-enabled form-check-input" type="checkbox"> |  | ||||||
|             ${t("ai_llm.embedding_auto_update_enabled")} |  | ||||||
|         </label> |  | ||||||
|         <div class="form-text">${t("ai_llm.embedding_auto_update_enabled_description")}</div> |  | ||||||
|     </div> |  | ||||||
|  |  | ||||||
|     <!-- Recreate embeddings button --> |  | ||||||
|     <div class="form-group mt-3"> |  | ||||||
|         <button class="btn btn-outline-primary recreate-embeddings"> |  | ||||||
|             ${t("ai_llm.recreate_embeddings")} |  | ||||||
|         </button> |  | ||||||
|         <div class="form-text">${t("ai_llm.recreate_embeddings_description")}</div> |  | ||||||
|     </div> |  | ||||||
|  |  | ||||||
|     <!-- Rebuild index button --> |  | ||||||
|     <div class="form-group mt-3"> |  | ||||||
|         <button class="btn btn-outline-primary rebuild-embeddings-index"> |  | ||||||
|             ${t("ai_llm.rebuild_index")} |  | ||||||
|         </button> |  | ||||||
|         <div class="form-text">${t("ai_llm.rebuild_index_description")}</div> |  | ||||||
|     </div> |  | ||||||
|  |  | ||||||
|     <!-- Note about embedding provider precedence --> |  | ||||||
|     <div class="form-group mt-3"> |  | ||||||
|         <h5>${t("ai_llm.embedding_providers_order")}</h5> |  | ||||||
|         <div class="form-text mt-2">${t("ai_llm.embedding_providers_order_description")}</div> |  | ||||||
|     </div> |  | ||||||
| </div>`; |  | ||||||
|   | |||||||
| @@ -30,17 +30,8 @@ export default async function buildApp() { | |||||||
|     // Listen for database initialization event |     // Listen for database initialization event | ||||||
|     eventService.subscribe(eventService.DB_INITIALIZED, async () => { |     eventService.subscribe(eventService.DB_INITIALIZED, async () => { | ||||||
|         try { |         try { | ||||||
|             log.info("Database initialized, setting up LLM features"); |             log.info("Database initialized, LLM features available"); | ||||||
|  |             log.info("LLM features ready"); | ||||||
|             // Initialize embedding providers |  | ||||||
|             const { initializeEmbeddings } = await import("./services/llm/embeddings/init.js"); |  | ||||||
|             await initializeEmbeddings(); |  | ||||||
|  |  | ||||||
|             // Initialize the index service for LLM functionality |  | ||||||
|             const { default: indexService } = await import("./services/llm/index_service.js"); |  | ||||||
|             await indexService.initialize().catch(e => console.error("Failed to initialize index service:", e)); |  | ||||||
|  |  | ||||||
|             log.info("LLM features initialized successfully"); |  | ||||||
|         } catch (error) { |         } catch (error) { | ||||||
|             console.error("Error initializing LLM features:", error); |             console.error("Error initializing LLM features:", error); | ||||||
|         } |         } | ||||||
| @@ -49,13 +40,7 @@ export default async function buildApp() { | |||||||
|     // Initialize LLM features only if database is already initialized |     // Initialize LLM features only if database is already initialized | ||||||
|     if (sql_init.isDbInitialized()) { |     if (sql_init.isDbInitialized()) { | ||||||
|         try { |         try { | ||||||
|             // Initialize embedding providers |             log.info("LLM features ready"); | ||||||
|             const { initializeEmbeddings } = await import("./services/llm/embeddings/init.js"); |  | ||||||
|             await initializeEmbeddings(); |  | ||||||
|  |  | ||||||
|             // Initialize the index service for LLM functionality |  | ||||||
|             const { default: indexService } = await import("./services/llm/index_service.js"); |  | ||||||
|             await indexService.initialize().catch(e => console.error("Failed to initialize index service:", e)); |  | ||||||
|         } catch (error) { |         } catch (error) { | ||||||
|             console.error("Error initializing LLM features:", error); |             console.error("Error initializing LLM features:", error); | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -146,47 +146,6 @@ CREATE INDEX IDX_notes_blobId on notes (blobId); | |||||||
| CREATE INDEX IDX_revisions_blobId on revisions (blobId); | CREATE INDEX IDX_revisions_blobId on revisions (blobId); | ||||||
| CREATE INDEX IDX_attachments_blobId on attachments (blobId); | CREATE INDEX IDX_attachments_blobId on attachments (blobId); | ||||||
|  |  | ||||||
| CREATE TABLE IF NOT EXISTS "note_embeddings" ( |  | ||||||
|     "embedId" TEXT NOT NULL PRIMARY KEY, |  | ||||||
|     "noteId" TEXT NOT NULL, |  | ||||||
|     "providerId" TEXT NOT NULL, |  | ||||||
|     "modelId" TEXT NOT NULL, |  | ||||||
|     "dimension" INTEGER NOT NULL, |  | ||||||
|     "embedding" BLOB NOT NULL, |  | ||||||
|     "version" INTEGER NOT NULL DEFAULT 1, |  | ||||||
|     "dateCreated" TEXT NOT NULL, |  | ||||||
|     "utcDateCreated" TEXT NOT NULL, |  | ||||||
|     "dateModified" TEXT NOT NULL, |  | ||||||
|     "utcDateModified" TEXT NOT NULL |  | ||||||
| ); |  | ||||||
|  |  | ||||||
| CREATE INDEX "IDX_note_embeddings_noteId" ON "note_embeddings" ("noteId"); |  | ||||||
| CREATE INDEX "IDX_note_embeddings_providerId_modelId" ON "note_embeddings" ("providerId", "modelId"); |  | ||||||
|  |  | ||||||
| CREATE TABLE IF NOT EXISTS "embedding_queue" ( |  | ||||||
|     "noteId" TEXT NOT NULL PRIMARY KEY, |  | ||||||
|     "operation" TEXT NOT NULL, |  | ||||||
|     "dateQueued" TEXT NOT NULL, |  | ||||||
|     "utcDateQueued" TEXT NOT NULL, |  | ||||||
|     "priority" INTEGER NOT NULL DEFAULT 0, |  | ||||||
|     "attempts" INTEGER NOT NULL DEFAULT 0, |  | ||||||
|     "lastAttempt" TEXT NULL, |  | ||||||
|     "error" TEXT NULL, |  | ||||||
|     "failed" INTEGER NOT NULL DEFAULT 0, |  | ||||||
|     "isProcessing" INTEGER NOT NULL DEFAULT 0 |  | ||||||
| ); |  | ||||||
|  |  | ||||||
| CREATE TABLE IF NOT EXISTS "embedding_providers" ( |  | ||||||
|     "providerId" TEXT NOT NULL PRIMARY KEY, |  | ||||||
|     "name" TEXT NOT NULL, |  | ||||||
|     "isEnabled" INTEGER NOT NULL DEFAULT 0, |  | ||||||
|     "priority" INTEGER NOT NULL DEFAULT 0, |  | ||||||
|     "config" TEXT NOT NULL, |  | ||||||
|     "dateCreated" TEXT NOT NULL, |  | ||||||
|     "utcDateCreated" TEXT NOT NULL, |  | ||||||
|     "dateModified" TEXT NOT NULL, |  | ||||||
|     "utcDateModified" TEXT NOT NULL |  | ||||||
| ); |  | ||||||
|  |  | ||||||
| CREATE TABLE IF NOT EXISTS sessions ( | CREATE TABLE IF NOT EXISTS sessions ( | ||||||
|     id TEXT PRIMARY KEY, |     id TEXT PRIMARY KEY, | ||||||
|   | |||||||
| @@ -12,7 +12,6 @@ import type { AttachmentRow, BlobRow, RevisionRow } from "@triliumnext/commons"; | |||||||
| import BBlob from "./entities/bblob.js"; | import BBlob from "./entities/bblob.js"; | ||||||
| import BRecentNote from "./entities/brecent_note.js"; | import BRecentNote from "./entities/brecent_note.js"; | ||||||
| import type AbstractBeccaEntity from "./entities/abstract_becca_entity.js"; | import type AbstractBeccaEntity from "./entities/abstract_becca_entity.js"; | ||||||
| import type BNoteEmbedding from "./entities/bnote_embedding.js"; |  | ||||||
|  |  | ||||||
| interface AttachmentOpts { | interface AttachmentOpts { | ||||||
|     includeContentLength?: boolean; |     includeContentLength?: boolean; | ||||||
| @@ -33,7 +32,6 @@ export default class Becca { | |||||||
|     attributeIndex!: Record<string, BAttribute[]>; |     attributeIndex!: Record<string, BAttribute[]>; | ||||||
|     options!: Record<string, BOption>; |     options!: Record<string, BOption>; | ||||||
|     etapiTokens!: Record<string, BEtapiToken>; |     etapiTokens!: Record<string, BEtapiToken>; | ||||||
|     noteEmbeddings!: Record<string, BNoteEmbedding>; |  | ||||||
|  |  | ||||||
|     allNoteSetCache: NoteSet | null; |     allNoteSetCache: NoteSet | null; | ||||||
|  |  | ||||||
| @@ -50,7 +48,6 @@ export default class Becca { | |||||||
|         this.attributeIndex = {}; |         this.attributeIndex = {}; | ||||||
|         this.options = {}; |         this.options = {}; | ||||||
|         this.etapiTokens = {}; |         this.etapiTokens = {}; | ||||||
|         this.noteEmbeddings = {}; |  | ||||||
|  |  | ||||||
|         this.dirtyNoteSetCache(); |         this.dirtyNoteSetCache(); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -9,10 +9,9 @@ import BBranch from "./entities/bbranch.js"; | |||||||
| import BAttribute from "./entities/battribute.js"; | import BAttribute from "./entities/battribute.js"; | ||||||
| import BOption from "./entities/boption.js"; | import BOption from "./entities/boption.js"; | ||||||
| import BEtapiToken from "./entities/betapi_token.js"; | import BEtapiToken from "./entities/betapi_token.js"; | ||||||
| import BNoteEmbedding from "./entities/bnote_embedding.js"; |  | ||||||
| import cls from "../services/cls.js"; | import cls from "../services/cls.js"; | ||||||
| import entityConstructor from "../becca/entity_constructor.js"; | import entityConstructor from "../becca/entity_constructor.js"; | ||||||
| import type { AttributeRow, BranchRow, EtapiTokenRow, NoteRow, OptionRow, NoteEmbeddingRow } from "@triliumnext/commons"; | import type { AttributeRow, BranchRow, EtapiTokenRow, NoteRow, OptionRow } from "@triliumnext/commons"; | ||||||
| import type AbstractBeccaEntity from "./entities/abstract_becca_entity.js"; | import type AbstractBeccaEntity from "./entities/abstract_becca_entity.js"; | ||||||
| import ws from "../services/ws.js"; | import ws from "../services/ws.js"; | ||||||
|  |  | ||||||
| @@ -65,17 +64,6 @@ function load() { | |||||||
|             new BEtapiToken(row); |             new BEtapiToken(row); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             for (const row of sql.getRows<NoteEmbeddingRow>(/*sql*/`SELECT embedId, noteId, providerId, modelId, dimension, embedding, version, dateCreated, dateModified, utcDateCreated, utcDateModified FROM note_embeddings`)) { |  | ||||||
|                 new BNoteEmbedding(row).init(); |  | ||||||
|             } |  | ||||||
|         } catch (e: unknown) { |  | ||||||
|             if (e && typeof e === "object" && "message" in e && typeof e.message === "string" && e.message.includes("no such table")) { |  | ||||||
|                 // Can be ignored. |  | ||||||
|             } else { |  | ||||||
|                 throw e; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     }); |     }); | ||||||
|  |  | ||||||
|     for (const noteId in becca.notes) { |     for (const noteId in becca.notes) { | ||||||
| @@ -98,7 +86,7 @@ eventService.subscribeBeccaLoader([eventService.ENTITY_CHANGE_SYNCED], ({ entity | |||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if (["notes", "branches", "attributes", "etapi_tokens", "options", "note_embeddings"].includes(entityName)) { |     if (["notes", "branches", "attributes", "etapi_tokens", "options"].includes(entityName)) { | ||||||
|         const EntityClass = entityConstructor.getEntityFromEntityName(entityName); |         const EntityClass = entityConstructor.getEntityFromEntityName(entityName); | ||||||
|         const primaryKeyName = EntityClass.primaryKeyName; |         const primaryKeyName = EntityClass.primaryKeyName; | ||||||
|  |  | ||||||
| @@ -156,8 +144,6 @@ eventService.subscribeBeccaLoader([eventService.ENTITY_DELETED, eventService.ENT | |||||||
|         attributeDeleted(entityId); |         attributeDeleted(entityId); | ||||||
|     } else if (entityName === "etapi_tokens") { |     } else if (entityName === "etapi_tokens") { | ||||||
|         etapiTokenDeleted(entityId); |         etapiTokenDeleted(entityId); | ||||||
|     } else if (entityName === "note_embeddings") { |  | ||||||
|         noteEmbeddingDeleted(entityId); |  | ||||||
|     } |     } | ||||||
| }); | }); | ||||||
|  |  | ||||||
| @@ -293,9 +279,6 @@ function etapiTokenDeleted(etapiTokenId: string) { | |||||||
|     delete becca.etapiTokens[etapiTokenId]; |     delete becca.etapiTokens[etapiTokenId]; | ||||||
| } | } | ||||||
|  |  | ||||||
| function noteEmbeddingDeleted(embedId: string) { |  | ||||||
|     delete becca.noteEmbeddings[embedId]; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| eventService.subscribeBeccaLoader(eventService.ENTER_PROTECTED_SESSION, () => { | eventService.subscribeBeccaLoader(eventService.ENTER_PROTECTED_SESSION, () => { | ||||||
|     try { |     try { | ||||||
|   | |||||||
| @@ -1,83 +0,0 @@ | |||||||
| import AbstractBeccaEntity from "./abstract_becca_entity.js"; |  | ||||||
| import dateUtils from "../../services/date_utils.js"; |  | ||||||
| import type { NoteEmbeddingRow } from "@triliumnext/commons"; |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Entity representing a note's vector embedding for semantic search and AI features |  | ||||||
|  */ |  | ||||||
| class BNoteEmbedding extends AbstractBeccaEntity<BNoteEmbedding> { |  | ||||||
|     static get entityName() { |  | ||||||
|         return "note_embeddings"; |  | ||||||
|     } |  | ||||||
|     static get primaryKeyName() { |  | ||||||
|         return "embedId"; |  | ||||||
|     } |  | ||||||
|     static get hashedProperties() { |  | ||||||
|         return ["embedId", "noteId", "providerId", "modelId", "dimension", "version"]; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     embedId!: string; |  | ||||||
|     noteId!: string; |  | ||||||
|     providerId!: string; |  | ||||||
|     modelId!: string; |  | ||||||
|     dimension!: number; |  | ||||||
|     embedding!: Buffer; |  | ||||||
|     version!: number; |  | ||||||
|  |  | ||||||
|     constructor(row?: NoteEmbeddingRow) { |  | ||||||
|         super(); |  | ||||||
|  |  | ||||||
|         if (row) { |  | ||||||
|             this.updateFromRow(row); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     init() { |  | ||||||
|         if (this.embedId) { |  | ||||||
|             this.becca.noteEmbeddings[this.embedId] = this; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     updateFromRow(row: NoteEmbeddingRow): void { |  | ||||||
|         this.embedId = row.embedId; |  | ||||||
|         this.noteId = row.noteId; |  | ||||||
|         this.providerId = row.providerId; |  | ||||||
|         this.modelId = row.modelId; |  | ||||||
|         this.dimension = row.dimension; |  | ||||||
|         this.embedding = row.embedding; |  | ||||||
|         this.version = row.version; |  | ||||||
|         this.dateCreated = row.dateCreated; |  | ||||||
|         this.dateModified = row.dateModified; |  | ||||||
|         this.utcDateCreated = row.utcDateCreated; |  | ||||||
|         this.utcDateModified = row.utcDateModified; |  | ||||||
|  |  | ||||||
|         if (this.embedId) { |  | ||||||
|             this.becca.noteEmbeddings[this.embedId] = this; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     override beforeSaving() { |  | ||||||
|         super.beforeSaving(); |  | ||||||
|  |  | ||||||
|         this.dateModified = dateUtils.localNowDateTime(); |  | ||||||
|         this.utcDateModified = dateUtils.utcNowDateTime(); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     getPojo(): NoteEmbeddingRow { |  | ||||||
|         return { |  | ||||||
|             embedId: this.embedId, |  | ||||||
|             noteId: this.noteId, |  | ||||||
|             providerId: this.providerId, |  | ||||||
|             modelId: this.modelId, |  | ||||||
|             dimension: this.dimension, |  | ||||||
|             embedding: this.embedding, |  | ||||||
|             version: this.version, |  | ||||||
|             dateCreated: this.dateCreated!, |  | ||||||
|             dateModified: this.dateModified!, |  | ||||||
|             utcDateCreated: this.utcDateCreated, |  | ||||||
|             utcDateModified: this.utcDateModified! |  | ||||||
|         }; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| export default BNoteEmbedding; |  | ||||||
| @@ -6,7 +6,6 @@ import BBlob from "./entities/bblob.js"; | |||||||
| import BBranch from "./entities/bbranch.js"; | import BBranch from "./entities/bbranch.js"; | ||||||
| import BEtapiToken from "./entities/betapi_token.js"; | import BEtapiToken from "./entities/betapi_token.js"; | ||||||
| import BNote from "./entities/bnote.js"; | import BNote from "./entities/bnote.js"; | ||||||
| import BNoteEmbedding from "./entities/bnote_embedding.js"; |  | ||||||
| import BOption from "./entities/boption.js"; | import BOption from "./entities/boption.js"; | ||||||
| import BRecentNote from "./entities/brecent_note.js"; | import BRecentNote from "./entities/brecent_note.js"; | ||||||
| import BRevision from "./entities/brevision.js"; | import BRevision from "./entities/brevision.js"; | ||||||
| @@ -20,7 +19,6 @@ const ENTITY_NAME_TO_ENTITY: Record<string, ConstructorData<any> & EntityClass> | |||||||
|     branches: BBranch, |     branches: BBranch, | ||||||
|     etapi_tokens: BEtapiToken, |     etapi_tokens: BEtapiToken, | ||||||
|     notes: BNote, |     notes: BNote, | ||||||
|     note_embeddings: BNoteEmbedding, |  | ||||||
|     options: BOption, |     options: BOption, | ||||||
|     recent_notes: BRecentNote, |     recent_notes: BRecentNote, | ||||||
|     revisions: BRevision |     revisions: BRevision | ||||||
|   | |||||||
| @@ -6,6 +6,19 @@ | |||||||
|  |  | ||||||
| // Migrations should be kept in descending order, so the latest migration is first. | // Migrations should be kept in descending order, so the latest migration is first. | ||||||
| const MIGRATIONS: (SqlMigration | JsMigration)[] = [ | const MIGRATIONS: (SqlMigration | JsMigration)[] = [ | ||||||
|  |     // Remove embedding tables since LLM embedding functionality has been removed | ||||||
|  |     { | ||||||
|  |         version: 232, | ||||||
|  |         sql: /*sql*/` | ||||||
|  |             -- Remove LLM embedding tables and data | ||||||
|  |             DROP TABLE IF EXISTS "note_embeddings"; | ||||||
|  |             DROP TABLE IF EXISTS "embedding_queue"; | ||||||
|  |             DROP TABLE IF EXISTS "embedding_providers"; | ||||||
|  |  | ||||||
|  |             -- Remove embedding-related entity changes | ||||||
|  |             DELETE FROM entity_changes WHERE entityName IN ('note_embeddings', 'embedding_queue', 'embedding_providers'); | ||||||
|  |         ` | ||||||
|  |     }, | ||||||
|     // Session store |     // Session store | ||||||
|     { |     { | ||||||
|         version: 231, |         version: 231, | ||||||
|   | |||||||
| @@ -1,843 +0,0 @@ | |||||||
| import options from "../../services/options.js"; |  | ||||||
| import vectorStore from "../../services/llm/embeddings/index.js"; |  | ||||||
| import providerManager from "../../services/llm/providers/providers.js"; |  | ||||||
| import indexService from "../../services/llm/index_service.js"; |  | ||||||
| import becca from "../../becca/becca.js"; |  | ||||||
| import type { Request, Response } from "express"; |  | ||||||
| import log from "../../services/log.js"; |  | ||||||
| import sql from "../../services/sql.js"; |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * @swagger |  | ||||||
|  * /api/llm/embeddings/similar/{noteId}: |  | ||||||
|  *   get: |  | ||||||
|  *     summary: Find similar notes based on a given note ID |  | ||||||
|  *     operationId: embeddings-similar-by-note |  | ||||||
|  *     parameters: |  | ||||||
|  *       - name: noteId |  | ||||||
|  *         in: path |  | ||||||
|  *         required: true |  | ||||||
|  *         schema: |  | ||||||
|  *           type: string |  | ||||||
|  *       - name: providerId |  | ||||||
|  *         in: query |  | ||||||
|  *         required: false |  | ||||||
|  *         schema: |  | ||||||
|  *           type: string |  | ||||||
|  *         default: openai |  | ||||||
|  *         description: Embedding provider ID |  | ||||||
|  *       - name: modelId |  | ||||||
|  *         in: query |  | ||||||
|  *         required: false |  | ||||||
|  *         schema: |  | ||||||
|  *           type: string |  | ||||||
|  *         default: text-embedding-3-small |  | ||||||
|  *         description: Embedding model ID |  | ||||||
|  *       - name: limit |  | ||||||
|  *         in: query |  | ||||||
|  *         required: false |  | ||||||
|  *         schema: |  | ||||||
|  *           type: integer |  | ||||||
|  *         default: 10 |  | ||||||
|  *         description: Maximum number of similar notes to return |  | ||||||
|  *       - name: threshold |  | ||||||
|  *         in: query |  | ||||||
|  *         required: false |  | ||||||
|  *         schema: |  | ||||||
|  *           type: number |  | ||||||
|  *           format: float |  | ||||||
|  *         default: 0.7 |  | ||||||
|  *         description: Similarity threshold (0.0-1.0) |  | ||||||
|  *     responses: |  | ||||||
|  *       '200': |  | ||||||
|  *         description: List of similar notes |  | ||||||
|  *         content: |  | ||||||
|  *           application/json: |  | ||||||
|  *             schema: |  | ||||||
|  *               type: object |  | ||||||
|  *               properties: |  | ||||||
|  *                 success: |  | ||||||
|  *                   type: boolean |  | ||||||
|  *                 similarNotes: |  | ||||||
|  *                   type: array |  | ||||||
|  *                   items: |  | ||||||
|  *                     type: object |  | ||||||
|  *                     properties: |  | ||||||
|  *                       noteId: |  | ||||||
|  *                         type: string |  | ||||||
|  *                       title: |  | ||||||
|  *                         type: string |  | ||||||
|  *                       similarity: |  | ||||||
|  *                         type: number |  | ||||||
|  *                         format: float |  | ||||||
|  *       '400': |  | ||||||
|  *         description: Invalid request parameters |  | ||||||
|  *       '404': |  | ||||||
|  *         description: Note not found |  | ||||||
|  *     security: |  | ||||||
|  *       - session: [] |  | ||||||
|  *     tags: ["llm"] |  | ||||||
|  */ |  | ||||||
| async function findSimilarNotes(req: Request, res: Response) { |  | ||||||
|     const noteId = req.params.noteId; |  | ||||||
|     const providerId = req.query.providerId as string || 'openai'; |  | ||||||
|     const modelId = req.query.modelId as string || 'text-embedding-3-small'; |  | ||||||
|     const limit = parseInt(req.query.limit as string || '10', 10); |  | ||||||
|     const threshold = parseFloat(req.query.threshold as string || '0.7'); |  | ||||||
|  |  | ||||||
|     if (!noteId) { |  | ||||||
|         return [400, { |  | ||||||
|             success: false, |  | ||||||
|             message: "Note ID is required" |  | ||||||
|         }]; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     const embedding = await vectorStore.getEmbeddingForNote(noteId, providerId, modelId); |  | ||||||
|  |  | ||||||
|     if (!embedding) { |  | ||||||
|         // If no embedding exists for this note yet, generate one |  | ||||||
|         const note = becca.getNote(noteId); |  | ||||||
|         if (!note) { |  | ||||||
|             return [404, { |  | ||||||
|                 success: false, |  | ||||||
|                 message: "Note not found" |  | ||||||
|             }]; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         const context = await vectorStore.getNoteEmbeddingContext(noteId); |  | ||||||
|         const provider = providerManager.getEmbeddingProvider(providerId); |  | ||||||
|  |  | ||||||
|         if (!provider) { |  | ||||||
|             return [400, { |  | ||||||
|                 success: false, |  | ||||||
|                 message: `Embedding provider '${providerId}' not found` |  | ||||||
|             }]; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         const newEmbedding = await provider.generateNoteEmbeddings(context); |  | ||||||
|         await vectorStore.storeNoteEmbedding(noteId, providerId, modelId, newEmbedding); |  | ||||||
|  |  | ||||||
|         const similarNotes = await vectorStore.findSimilarNotes( |  | ||||||
|             newEmbedding, providerId, modelId, limit, threshold |  | ||||||
|         ); |  | ||||||
|  |  | ||||||
|         return { |  | ||||||
|             success: true, |  | ||||||
|             similarNotes |  | ||||||
|         }; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     const similarNotes = await vectorStore.findSimilarNotes( |  | ||||||
|         embedding.embedding, providerId, modelId, limit, threshold |  | ||||||
|     ); |  | ||||||
|  |  | ||||||
|     return { |  | ||||||
|         success: true, |  | ||||||
|         similarNotes |  | ||||||
|     }; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * @swagger |  | ||||||
|  * /api/llm/embeddings/search: |  | ||||||
|  *   post: |  | ||||||
|  *     summary: Search for notes similar to provided text |  | ||||||
|  *     operationId: embeddings-search-by-text |  | ||||||
|  *     parameters: |  | ||||||
|  *       - name: providerId |  | ||||||
|  *         in: query |  | ||||||
|  *         required: false |  | ||||||
|  *         schema: |  | ||||||
|  *           type: string |  | ||||||
|  *         default: openai |  | ||||||
|  *         description: Embedding provider ID |  | ||||||
|  *       - name: modelId |  | ||||||
|  *         in: query |  | ||||||
|  *         required: false |  | ||||||
|  *         schema: |  | ||||||
|  *           type: string |  | ||||||
|  *         default: text-embedding-3-small |  | ||||||
|  *         description: Embedding model ID |  | ||||||
|  *       - name: limit |  | ||||||
|  *         in: query |  | ||||||
|  *         required: false |  | ||||||
|  *         schema: |  | ||||||
|  *           type: integer |  | ||||||
|  *         default: 10 |  | ||||||
|  *         description: Maximum number of similar notes to return |  | ||||||
|  *       - name: threshold |  | ||||||
|  *         in: query |  | ||||||
|  *         required: false |  | ||||||
|  *         schema: |  | ||||||
|  *           type: number |  | ||||||
|  *           format: float |  | ||||||
|  *         default: 0.7 |  | ||||||
|  *         description: Similarity threshold (0.0-1.0) |  | ||||||
|  *     requestBody: |  | ||||||
|  *       required: true |  | ||||||
|  *       content: |  | ||||||
|  *         application/json: |  | ||||||
|  *           schema: |  | ||||||
|  *             type: object |  | ||||||
|  *             properties: |  | ||||||
|  *               text: |  | ||||||
|  *                 type: string |  | ||||||
|  *                 description: Text to search with |  | ||||||
|  *     responses: |  | ||||||
|  *       '200': |  | ||||||
|  *         description: List of similar notes |  | ||||||
|  *         content: |  | ||||||
|  *           application/json: |  | ||||||
|  *             schema: |  | ||||||
|  *               type: object |  | ||||||
|  *               properties: |  | ||||||
|  *                 success: |  | ||||||
|  *                   type: boolean |  | ||||||
|  *                 similarNotes: |  | ||||||
|  *                   type: array |  | ||||||
|  *                   items: |  | ||||||
|  *                     type: object |  | ||||||
|  *                     properties: |  | ||||||
|  *                       noteId: |  | ||||||
|  *                         type: string |  | ||||||
|  *                       title: |  | ||||||
|  *                         type: string |  | ||||||
|  *                       similarity: |  | ||||||
|  *                         type: number |  | ||||||
|  *                         format: float |  | ||||||
|  *       '400': |  | ||||||
|  *         description: Invalid request parameters |  | ||||||
|  *     security: |  | ||||||
|  *       - session: [] |  | ||||||
|  *     tags: ["llm"] |  | ||||||
|  */ |  | ||||||
| async function searchByText(req: Request, res: Response) { |  | ||||||
|     const { text } = req.body; |  | ||||||
|     const providerId = req.query.providerId as string || 'openai'; |  | ||||||
|     const modelId = req.query.modelId as string || 'text-embedding-3-small'; |  | ||||||
|     const limit = parseInt(req.query.limit as string || '10', 10); |  | ||||||
|     const threshold = parseFloat(req.query.threshold as string || '0.7'); |  | ||||||
|  |  | ||||||
|     if (!text) { |  | ||||||
|         return [400, { |  | ||||||
|             success: false, |  | ||||||
|             message: "Search text is required" |  | ||||||
|         }]; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     const provider = providerManager.getEmbeddingProvider(providerId); |  | ||||||
|  |  | ||||||
|     if (!provider) { |  | ||||||
|         return [400, { |  | ||||||
|             success: false, |  | ||||||
|             message: `Embedding provider '${providerId}' not found` |  | ||||||
|         }]; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Generate embedding for the search text |  | ||||||
|     const embedding = await provider.generateEmbeddings(text); |  | ||||||
|  |  | ||||||
|     // Find similar notes |  | ||||||
|     const similarNotes = await vectorStore.findSimilarNotes( |  | ||||||
|         embedding, providerId, modelId, limit, threshold |  | ||||||
|     ); |  | ||||||
|  |  | ||||||
|     return { |  | ||||||
|         success: true, |  | ||||||
|         similarNotes |  | ||||||
|     }; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * @swagger |  | ||||||
|  * /api/llm/embeddings/providers: |  | ||||||
|  *   get: |  | ||||||
|  *     summary: Get available embedding providers |  | ||||||
|  *     operationId: embeddings-get-providers |  | ||||||
|  *     responses: |  | ||||||
|  *       '200': |  | ||||||
|  *         description: List of available embedding providers |  | ||||||
|  *         content: |  | ||||||
|  *           application/json: |  | ||||||
|  *             schema: |  | ||||||
|  *               type: object |  | ||||||
|  *               properties: |  | ||||||
|  *                 success: |  | ||||||
|  *                   type: boolean |  | ||||||
|  *                 providers: |  | ||||||
|  *                   type: array |  | ||||||
|  *                   items: |  | ||||||
|  *                     type: object |  | ||||||
|  *                     properties: |  | ||||||
|  *                       id: |  | ||||||
|  *                         type: string |  | ||||||
|  *                       name: |  | ||||||
|  *                         type: string |  | ||||||
|  *                       isEnabled: |  | ||||||
|  *                         type: boolean |  | ||||||
|  *                       priority: |  | ||||||
|  *                         type: integer |  | ||||||
|  *                       config: |  | ||||||
|  *                         type: object |  | ||||||
|  *     security: |  | ||||||
|  *       - session: [] |  | ||||||
|  *     tags: ["llm"] |  | ||||||
|  */ |  | ||||||
| async function getProviders(req: Request, res: Response) { |  | ||||||
|     const providerConfigs = await providerManager.getEmbeddingProviderConfigs(); |  | ||||||
|  |  | ||||||
|     return { |  | ||||||
|         success: true, |  | ||||||
|         providers: providerConfigs |  | ||||||
|     }; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * @swagger |  | ||||||
|  * /api/llm/embeddings/providers/{providerId}: |  | ||||||
|  *   patch: |  | ||||||
|  *     summary: Update embedding provider configuration |  | ||||||
|  *     operationId: embeddings-update-provider |  | ||||||
|  *     parameters: |  | ||||||
|  *       - name: providerId |  | ||||||
|  *         in: path |  | ||||||
|  *         required: true |  | ||||||
|  *         schema: |  | ||||||
|  *           type: string |  | ||||||
|  *         description: Provider ID to update |  | ||||||
|  *     requestBody: |  | ||||||
|  *       required: true |  | ||||||
|  *       content: |  | ||||||
|  *         application/json: |  | ||||||
|  *           schema: |  | ||||||
|  *             type: object |  | ||||||
|  *             properties: |  | ||||||
|  *               enabled: |  | ||||||
|  *                 type: boolean |  | ||||||
|  *                 description: Whether provider is enabled |  | ||||||
|  *               priority: |  | ||||||
|  *                 type: integer |  | ||||||
|  *                 description: Priority order (lower is higher priority) |  | ||||||
|  *               config: |  | ||||||
|  *                 type: object |  | ||||||
|  *                 description: Provider-specific configuration |  | ||||||
|  *     responses: |  | ||||||
|  *       '200': |  | ||||||
|  *         description: Provider updated successfully |  | ||||||
|  *         content: |  | ||||||
|  *           application/json: |  | ||||||
|  *             schema: |  | ||||||
|  *               type: object |  | ||||||
|  *               properties: |  | ||||||
|  *                 success: |  | ||||||
|  *                   type: boolean |  | ||||||
|  *       '400': |  | ||||||
|  *         description: Invalid provider ID or configuration |  | ||||||
|  *     security: |  | ||||||
|  *       - session: [] |  | ||||||
|  *     tags: ["llm"] |  | ||||||
|  */ |  | ||||||
| async function updateProvider(req: Request, res: Response) { |  | ||||||
|     const { providerId } = req.params; |  | ||||||
|     const { isEnabled, priority, config } = req.body; |  | ||||||
|  |  | ||||||
|     const success = await providerManager.updateEmbeddingProviderConfig( |  | ||||||
|         providerId, isEnabled, priority |  | ||||||
|     ); |  | ||||||
|  |  | ||||||
|     if (!success) { |  | ||||||
|         return [404, { |  | ||||||
|             success: false, |  | ||||||
|             message: "Provider not found" |  | ||||||
|         }]; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return { |  | ||||||
|         success: true |  | ||||||
|     }; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * @swagger |  | ||||||
|  * /api/llm/embeddings/reprocess: |  | ||||||
|  *   post: |  | ||||||
|  *     summary: Reprocess embeddings for all notes |  | ||||||
|  *     operationId: embeddings-reprocess-all |  | ||||||
|  *     requestBody: |  | ||||||
|  *       required: true |  | ||||||
|  *       content: |  | ||||||
|  *         application/json: |  | ||||||
|  *           schema: |  | ||||||
|  *             type: object |  | ||||||
|  *             properties: |  | ||||||
|  *               providerId: |  | ||||||
|  *                 type: string |  | ||||||
|  *                 description: Provider ID to use for reprocessing |  | ||||||
|  *               modelId: |  | ||||||
|  *                 type: string |  | ||||||
|  *                 description: Model ID to use for reprocessing |  | ||||||
|  *               forceReprocess: |  | ||||||
|  *                 type: boolean |  | ||||||
|  *                 description: Whether to reprocess notes that already have embeddings |  | ||||||
|  *     responses: |  | ||||||
|  *       '200': |  | ||||||
|  *         description: Reprocessing started |  | ||||||
|  *         content: |  | ||||||
|  *           application/json: |  | ||||||
|  *             schema: |  | ||||||
|  *               type: object |  | ||||||
|  *               properties: |  | ||||||
|  *                 success: |  | ||||||
|  *                   type: boolean |  | ||||||
|  *                 jobId: |  | ||||||
|  *                   type: string |  | ||||||
|  *                 message: |  | ||||||
|  *                   type: string |  | ||||||
|  *       '400': |  | ||||||
|  *         description: Invalid provider ID or configuration |  | ||||||
|  *     security: |  | ||||||
|  *       - session: [] |  | ||||||
|  *     tags: ["llm"] |  | ||||||
|  */ |  | ||||||
| async function reprocessAllNotes(req: Request, res: Response) { |  | ||||||
|     // Import cls |  | ||||||
|     const cls = (await import("../../services/cls.js")).default; |  | ||||||
|  |  | ||||||
|     // Start the reprocessing operation in the background |  | ||||||
|     setTimeout(async () => { |  | ||||||
|         try { |  | ||||||
|             // Wrap the operation in cls.init to ensure proper context |  | ||||||
|             cls.init(async () => { |  | ||||||
|                 await indexService.reprocessAllNotes(); |  | ||||||
|                 log.info("Embedding reprocessing completed successfully"); |  | ||||||
|             }); |  | ||||||
|         } catch (error: any) { |  | ||||||
|             log.error(`Error during background embedding reprocessing: ${error.message || "Unknown error"}`); |  | ||||||
|         } |  | ||||||
|     }, 0); |  | ||||||
|  |  | ||||||
|     // Return the response data |  | ||||||
|     return { |  | ||||||
|         success: true, |  | ||||||
|         message: "Embedding reprocessing started in the background" |  | ||||||
|     }; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * @swagger |  | ||||||
|  * /api/llm/embeddings/queue-status: |  | ||||||
|  *   get: |  | ||||||
|  *     summary: Get status of the embedding processing queue |  | ||||||
|  *     operationId: embeddings-queue-status |  | ||||||
|  *     parameters: |  | ||||||
|  *       - name: jobId |  | ||||||
|  *         in: query |  | ||||||
|  *         required: false |  | ||||||
|  *         schema: |  | ||||||
|  *           type: string |  | ||||||
|  *         description: Optional job ID to get status for a specific processing job |  | ||||||
|  *     responses: |  | ||||||
|  *       '200': |  | ||||||
|  *         description: Queue status information |  | ||||||
|  *         content: |  | ||||||
|  *           application/json: |  | ||||||
|  *             schema: |  | ||||||
|  *               type: object |  | ||||||
|  *               properties: |  | ||||||
|  *                 success: |  | ||||||
|  *                   type: boolean |  | ||||||
|  *                 status: |  | ||||||
|  *                   type: string |  | ||||||
|  *                   enum: [idle, processing, paused] |  | ||||||
|  *                 progress: |  | ||||||
|  *                   type: number |  | ||||||
|  *                   format: float |  | ||||||
|  *                   description: Progress percentage (0-100) |  | ||||||
|  *                 details: |  | ||||||
|  *                   type: object |  | ||||||
|  *     security: |  | ||||||
|  *       - session: [] |  | ||||||
|  *     tags: ["llm"] |  | ||||||
|  */ |  | ||||||
| async function getQueueStatus(req: Request, res: Response) { |  | ||||||
|     // Use the imported sql instead of requiring it |  | ||||||
|     const queueCount = await sql.getValue( |  | ||||||
|         "SELECT COUNT(*) FROM embedding_queue" |  | ||||||
|     ); |  | ||||||
|  |  | ||||||
|     const failedCount = await sql.getValue( |  | ||||||
|         "SELECT COUNT(*) FROM embedding_queue WHERE attempts > 0" |  | ||||||
|     ); |  | ||||||
|  |  | ||||||
|     const totalEmbeddingsCount = await sql.getValue( |  | ||||||
|         "SELECT COUNT(*) FROM note_embeddings" |  | ||||||
|     ); |  | ||||||
|  |  | ||||||
|     return { |  | ||||||
|         success: true, |  | ||||||
|         status: { |  | ||||||
|             queueCount, |  | ||||||
|             failedCount, |  | ||||||
|             totalEmbeddingsCount |  | ||||||
|         } |  | ||||||
|     }; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * @swagger |  | ||||||
|  * /api/llm/embeddings/stats: |  | ||||||
|  *   get: |  | ||||||
|  *     summary: Get embedding statistics |  | ||||||
|  *     operationId: embeddings-stats |  | ||||||
|  *     responses: |  | ||||||
|  *       '200': |  | ||||||
|  *         description: Embedding statistics |  | ||||||
|  *         content: |  | ||||||
|  *           application/json: |  | ||||||
|  *             schema: |  | ||||||
|  *               type: object |  | ||||||
|  *               properties: |  | ||||||
|  *                 success: |  | ||||||
|  *                   type: boolean |  | ||||||
|  *                 stats: |  | ||||||
|  *                   type: object |  | ||||||
|  *                   properties: |  | ||||||
|  *                     totalEmbeddings: |  | ||||||
|  *                       type: integer |  | ||||||
|  *                     providers: |  | ||||||
|  *                       type: object |  | ||||||
|  *                     modelCounts: |  | ||||||
|  *                       type: object |  | ||||||
|  *                     lastUpdated: |  | ||||||
|  *                       type: string |  | ||||||
|  *                       format: date-time |  | ||||||
|  *     security: |  | ||||||
|  *       - session: [] |  | ||||||
|  *     tags: ["llm"] |  | ||||||
|  */ |  | ||||||
| async function getEmbeddingStats(req: Request, res: Response) { |  | ||||||
|     const stats = await vectorStore.getEmbeddingStats(); |  | ||||||
|  |  | ||||||
|     return { |  | ||||||
|         success: true, |  | ||||||
|         stats |  | ||||||
|     }; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * @swagger |  | ||||||
|  * /api/llm/embeddings/failed: |  | ||||||
|  *   get: |  | ||||||
|  *     summary: Get list of notes that failed embedding generation |  | ||||||
|  *     operationId: embeddings-failed-notes |  | ||||||
|  *     responses: |  | ||||||
|  *       '200': |  | ||||||
|  *         description: List of failed notes |  | ||||||
|  *         content: |  | ||||||
|  *           application/json: |  | ||||||
|  *             schema: |  | ||||||
|  *               type: object |  | ||||||
|  *               properties: |  | ||||||
|  *                 success: |  | ||||||
|  *                   type: boolean |  | ||||||
|  *                 failedNotes: |  | ||||||
|  *                   type: array |  | ||||||
|  *                   items: |  | ||||||
|  *                     type: object |  | ||||||
|  *                     properties: |  | ||||||
|  *                       noteId: |  | ||||||
|  *                         type: string |  | ||||||
|  *                       title: |  | ||||||
|  *                         type: string |  | ||||||
|  *                       error: |  | ||||||
|  *                         type: string |  | ||||||
|  *                       failedAt: |  | ||||||
|  *                         type: string |  | ||||||
|  *                         format: date-time |  | ||||||
|  *     security: |  | ||||||
|  *       - session: [] |  | ||||||
|  *     tags: ["llm"] |  | ||||||
|  */ |  | ||||||
| async function getFailedNotes(req: Request, res: Response) { |  | ||||||
|     const limit = parseInt(req.query.limit as string || '100', 10); |  | ||||||
|     const failedNotes = await vectorStore.getFailedEmbeddingNotes(limit); |  | ||||||
|  |  | ||||||
|     // No need to fetch note titles here anymore as they're already included in the response |  | ||||||
|     return { |  | ||||||
|         success: true, |  | ||||||
|         failedNotes: failedNotes |  | ||||||
|     }; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * @swagger |  | ||||||
|  * /api/llm/embeddings/retry/{noteId}: |  | ||||||
|  *   post: |  | ||||||
|  *     summary: Retry generating embeddings for a failed note |  | ||||||
|  *     operationId: embeddings-retry-note |  | ||||||
|  *     parameters: |  | ||||||
|  *       - name: noteId |  | ||||||
|  *         in: path |  | ||||||
|  *         required: true |  | ||||||
|  *         schema: |  | ||||||
|  *           type: string |  | ||||||
|  *         description: Note ID to retry |  | ||||||
|  *       - name: providerId |  | ||||||
|  *         in: query |  | ||||||
|  *         required: false |  | ||||||
|  *         schema: |  | ||||||
|  *           type: string |  | ||||||
|  *         description: Provider ID to use (defaults to configured default) |  | ||||||
|  *       - name: modelId |  | ||||||
|  *         in: query |  | ||||||
|  *         required: false |  | ||||||
|  *         schema: |  | ||||||
|  *           type: string |  | ||||||
|  *         description: Model ID to use (defaults to provider default) |  | ||||||
|  *     responses: |  | ||||||
|  *       '200': |  | ||||||
|  *         description: Retry result |  | ||||||
|  *         content: |  | ||||||
|  *           application/json: |  | ||||||
|  *             schema: |  | ||||||
|  *               type: object |  | ||||||
|  *               properties: |  | ||||||
|  *                 success: |  | ||||||
|  *                   type: boolean |  | ||||||
|  *                 message: |  | ||||||
|  *                   type: string |  | ||||||
|  *       '400': |  | ||||||
|  *         description: Invalid request |  | ||||||
|  *       '404': |  | ||||||
|  *         description: Note not found |  | ||||||
|  *     security: |  | ||||||
|  *       - session: [] |  | ||||||
|  *     tags: ["llm"] |  | ||||||
|  */ |  | ||||||
| async function retryFailedNote(req: Request, res: Response) { |  | ||||||
|     const { noteId } = req.params; |  | ||||||
|  |  | ||||||
|     if (!noteId) { |  | ||||||
|         return [400, { |  | ||||||
|             success: false, |  | ||||||
|             message: "Note ID is required" |  | ||||||
|         }]; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     const success = await vectorStore.retryFailedEmbedding(noteId); |  | ||||||
|  |  | ||||||
|     if (!success) { |  | ||||||
|         return [404, { |  | ||||||
|             success: false, |  | ||||||
|             message: "Failed note not found or note is not marked as failed" |  | ||||||
|         }]; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return { |  | ||||||
|         success: true, |  | ||||||
|         message: "Note queued for retry" |  | ||||||
|     }; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * @swagger |  | ||||||
|  * /api/llm/embeddings/retry-all-failed: |  | ||||||
|  *   post: |  | ||||||
|  *     summary: Retry generating embeddings for all failed notes |  | ||||||
|  *     operationId: embeddings-retry-all-failed |  | ||||||
|  *     requestBody: |  | ||||||
|  *       required: false |  | ||||||
|  *       content: |  | ||||||
|  *         application/json: |  | ||||||
|  *           schema: |  | ||||||
|  *             type: object |  | ||||||
|  *             properties: |  | ||||||
|  *               providerId: |  | ||||||
|  *                 type: string |  | ||||||
|  *                 description: Provider ID to use (defaults to configured default) |  | ||||||
|  *               modelId: |  | ||||||
|  *                 type: string |  | ||||||
|  *                 description: Model ID to use (defaults to provider default) |  | ||||||
|  *     responses: |  | ||||||
|  *       '200': |  | ||||||
|  *         description: Retry started |  | ||||||
|  *         content: |  | ||||||
|  *           application/json: |  | ||||||
|  *             schema: |  | ||||||
|  *               type: object |  | ||||||
|  *               properties: |  | ||||||
|  *                 success: |  | ||||||
|  *                   type: boolean |  | ||||||
|  *                 message: |  | ||||||
|  *                   type: string |  | ||||||
|  *                 jobId: |  | ||||||
|  *                   type: string |  | ||||||
|  *     security: |  | ||||||
|  *       - session: [] |  | ||||||
|  *     tags: ["llm"] |  | ||||||
|  */ |  | ||||||
| async function retryAllFailedNotes(req: Request, res: Response) { |  | ||||||
|     const count = await vectorStore.retryAllFailedEmbeddings(); |  | ||||||
|  |  | ||||||
|     return { |  | ||||||
|         success: true, |  | ||||||
|         message: `${count} failed notes queued for retry` |  | ||||||
|     }; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * @swagger |  | ||||||
|  * /api/llm/embeddings/rebuild-index: |  | ||||||
|  *   post: |  | ||||||
|  *     summary: Rebuild the vector store index |  | ||||||
|  *     operationId: embeddings-rebuild-index |  | ||||||
|  *     responses: |  | ||||||
|  *       '200': |  | ||||||
|  *         description: Rebuild started |  | ||||||
|  *         content: |  | ||||||
|  *           application/json: |  | ||||||
|  *             schema: |  | ||||||
|  *               type: object |  | ||||||
|  *               properties: |  | ||||||
|  *                 success: |  | ||||||
|  *                   type: boolean |  | ||||||
|  *                 message: |  | ||||||
|  *                   type: string |  | ||||||
|  *                 jobId: |  | ||||||
|  *                   type: string |  | ||||||
|  *     security: |  | ||||||
|  *       - session: [] |  | ||||||
|  *     tags: ["llm"] |  | ||||||
|  */ |  | ||||||
| async function rebuildIndex(req: Request, res: Response) { |  | ||||||
|     // Start the index rebuilding operation in the background |  | ||||||
|     setTimeout(async () => { |  | ||||||
|         try { |  | ||||||
|             await indexService.startFullIndexing(true); |  | ||||||
|             log.info("Index rebuilding completed successfully"); |  | ||||||
|         } catch (error: any) { |  | ||||||
|             log.error(`Error during background index rebuilding: ${error.message || "Unknown error"}`); |  | ||||||
|         } |  | ||||||
|     }, 0); |  | ||||||
|  |  | ||||||
|     // Return the response data |  | ||||||
|     return { |  | ||||||
|         success: true, |  | ||||||
|         message: "Index rebuilding started in the background" |  | ||||||
|     }; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * @swagger |  | ||||||
|  * /api/llm/embeddings/index-rebuild-status: |  | ||||||
|  *   get: |  | ||||||
|  *     summary: Get status of the vector index rebuild operation |  | ||||||
|  *     operationId: embeddings-rebuild-status |  | ||||||
|  *     parameters: |  | ||||||
|  *       - name: jobId |  | ||||||
|  *         in: query |  | ||||||
|  *         required: false |  | ||||||
|  *         schema: |  | ||||||
|  *           type: string |  | ||||||
|  *         description: Optional job ID to get status for a specific rebuild job |  | ||||||
|  *     responses: |  | ||||||
|  *       '200': |  | ||||||
|  *         description: Rebuild status information |  | ||||||
|  *         content: |  | ||||||
|  *           application/json: |  | ||||||
|  *             schema: |  | ||||||
|  *               type: object |  | ||||||
|  *               properties: |  | ||||||
|  *                 success: |  | ||||||
|  *                   type: boolean |  | ||||||
|  *                 status: |  | ||||||
|  *                   type: string |  | ||||||
|  *                   enum: [idle, in_progress, completed, failed] |  | ||||||
|  *                 progress: |  | ||||||
|  *                   type: number |  | ||||||
|  *                   format: float |  | ||||||
|  *                   description: Progress percentage (0-100) |  | ||||||
|  *                 message: |  | ||||||
|  *                   type: string |  | ||||||
|  *                 details: |  | ||||||
|  *                   type: object |  | ||||||
|  *                   properties: |  | ||||||
|  *                     startTime: |  | ||||||
|  *                       type: string |  | ||||||
|  *                       format: date-time |  | ||||||
|  *                     processed: |  | ||||||
|  *                       type: integer |  | ||||||
|  *                     total: |  | ||||||
|  *                       type: integer |  | ||||||
|  *     security: |  | ||||||
|  *       - session: [] |  | ||||||
|  *     tags: ["llm"] |  | ||||||
|  */ |  | ||||||
| async function getIndexRebuildStatus(req: Request, res: Response) { |  | ||||||
|     const status = indexService.getIndexRebuildStatus(); |  | ||||||
|  |  | ||||||
|     return { |  | ||||||
|         success: true, |  | ||||||
|         status |  | ||||||
|     }; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Start embedding generation when AI is enabled |  | ||||||
|  */ |  | ||||||
| async function startEmbeddings(req: Request, res: Response) { |  | ||||||
|     try { |  | ||||||
|         log.info("Starting embedding generation system"); |  | ||||||
|          |  | ||||||
|         // Initialize the index service if not already initialized |  | ||||||
|         await indexService.initialize(); |  | ||||||
|          |  | ||||||
|         // Start automatic indexing |  | ||||||
|         await indexService.startEmbeddingGeneration(); |  | ||||||
|          |  | ||||||
|         return { |  | ||||||
|             success: true, |  | ||||||
|             message: "Embedding generation started" |  | ||||||
|         }; |  | ||||||
|     } catch (error: any) { |  | ||||||
|         log.error(`Error starting embeddings: ${error.message || 'Unknown error'}`); |  | ||||||
|         throw new Error(`Failed to start embeddings: ${error.message || 'Unknown error'}`); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Stop embedding generation when AI is disabled |  | ||||||
|  */ |  | ||||||
| async function stopEmbeddings(req: Request, res: Response) { |  | ||||||
|     try { |  | ||||||
|         log.info("Stopping embedding generation system"); |  | ||||||
|          |  | ||||||
|         // Stop automatic indexing |  | ||||||
|         await indexService.stopEmbeddingGeneration(); |  | ||||||
|          |  | ||||||
|         return { |  | ||||||
|             success: true, |  | ||||||
|             message: "Embedding generation stopped" |  | ||||||
|         }; |  | ||||||
|     } catch (error: any) { |  | ||||||
|         log.error(`Error stopping embeddings: ${error.message || 'Unknown error'}`); |  | ||||||
|         throw new Error(`Failed to stop embeddings: ${error.message || 'Unknown error'}`); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| export default { |  | ||||||
|     findSimilarNotes, |  | ||||||
|     searchByText, |  | ||||||
|     getProviders, |  | ||||||
|     updateProvider, |  | ||||||
|     reprocessAllNotes, |  | ||||||
|     getQueueStatus, |  | ||||||
|     getEmbeddingStats, |  | ||||||
|     getFailedNotes, |  | ||||||
|     retryFailedNote, |  | ||||||
|     retryAllFailedNotes, |  | ||||||
|     rebuildIndex, |  | ||||||
|     getIndexRebuildStatus, |  | ||||||
|     startEmbeddings, |  | ||||||
|     stopEmbeddings |  | ||||||
| }; |  | ||||||
| @@ -2,8 +2,6 @@ import type { Request, Response } from "express"; | |||||||
| import log from "../../services/log.js"; | import log from "../../services/log.js"; | ||||||
| import options from "../../services/options.js"; | import options from "../../services/options.js"; | ||||||
|  |  | ||||||
| // Import the index service for knowledge base management |  | ||||||
| import indexService from "../../services/llm/index_service.js"; |  | ||||||
| import restChatService from "../../services/llm/rest_chat_service.js"; | import restChatService from "../../services/llm/rest_chat_service.js"; | ||||||
| import chatStorageService from '../../services/llm/chat_storage_service.js'; | import chatStorageService from '../../services/llm/chat_storage_service.js'; | ||||||
|  |  | ||||||
| @@ -371,400 +369,13 @@ async function sendMessage(req: Request, res: Response) { | |||||||
|     return restChatService.handleSendMessage(req, res); |     return restChatService.handleSendMessage(req, res); | ||||||
| } | } | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * @swagger |  | ||||||
|  * /api/llm/indexes/stats: |  | ||||||
|  *   get: |  | ||||||
|  *     summary: Get stats about the LLM knowledge base indexing status |  | ||||||
|  *     operationId: llm-index-stats |  | ||||||
|  *     responses: |  | ||||||
|  *       '200': |  | ||||||
|  *         description: Index stats successfully retrieved |  | ||||||
|  *     security: |  | ||||||
|  *       - session: [] |  | ||||||
|  *     tags: ["llm"] |  | ||||||
|  */ |  | ||||||
| async function getIndexStats(req: Request, res: Response) { |  | ||||||
|     try { |  | ||||||
|         // Check if AI is enabled |  | ||||||
|         const aiEnabled = await options.getOptionBool('aiEnabled'); |  | ||||||
|         if (!aiEnabled) { |  | ||||||
|             return { |  | ||||||
|                 success: false, |  | ||||||
|                 message: "AI features are disabled" |  | ||||||
|             }; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Return indexing stats |  | ||||||
|         const stats = await indexService.getIndexingStats(); |  | ||||||
|         return { |  | ||||||
|             success: true, |  | ||||||
|             ...stats |  | ||||||
|         }; |  | ||||||
|     } catch (error: any) { |  | ||||||
|         log.error(`Error getting index stats: ${error.message || 'Unknown error'}`); |  | ||||||
|         throw new Error(`Failed to get index stats: ${error.message || 'Unknown error'}`); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * @swagger |  | ||||||
|  * /api/llm/indexes: |  | ||||||
|  *   post: |  | ||||||
|  *     summary: Start or continue indexing the knowledge base |  | ||||||
|  *     operationId: llm-start-indexing |  | ||||||
|  *     requestBody: |  | ||||||
|  *       required: false |  | ||||||
|  *       content: |  | ||||||
|  *         application/json: |  | ||||||
|  *           schema: |  | ||||||
|  *             type: object |  | ||||||
|  *             properties: |  | ||||||
|  *               force: |  | ||||||
|  *                 type: boolean |  | ||||||
|  *                 description: Whether to force reindexing of all notes |  | ||||||
|  *     responses: |  | ||||||
|  *       '200': |  | ||||||
|  *         description: Indexing started successfully |  | ||||||
|  *     security: |  | ||||||
|  *       - session: [] |  | ||||||
|  *     tags: ["llm"] |  | ||||||
|  */ |  | ||||||
| async function startIndexing(req: Request, res: Response) { |  | ||||||
|     try { |  | ||||||
|         // Check if AI is enabled |  | ||||||
|         const aiEnabled = await options.getOptionBool('aiEnabled'); |  | ||||||
|         if (!aiEnabled) { |  | ||||||
|             return { |  | ||||||
|                 success: false, |  | ||||||
|                 message: "AI features are disabled" |  | ||||||
|             }; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         const { force = false } = req.body; |  | ||||||
|  |  | ||||||
|         // Start indexing |  | ||||||
|         await indexService.startFullIndexing(force); |  | ||||||
|  |  | ||||||
|         return { |  | ||||||
|             success: true, |  | ||||||
|             message: "Indexing started" |  | ||||||
|         }; |  | ||||||
|     } catch (error: any) { |  | ||||||
|         log.error(`Error starting indexing: ${error.message || 'Unknown error'}`); |  | ||||||
|         throw new Error(`Failed to start indexing: ${error.message || 'Unknown error'}`); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * @swagger |  | ||||||
|  * /api/llm/indexes/failed: |  | ||||||
|  *   get: |  | ||||||
|  *     summary: Get list of notes that failed to index |  | ||||||
|  *     operationId: llm-failed-indexes |  | ||||||
|  *     parameters: |  | ||||||
|  *       - name: limit |  | ||||||
|  *         in: query |  | ||||||
|  *         required: false |  | ||||||
|  *         schema: |  | ||||||
|  *           type: integer |  | ||||||
|  *           default: 100 |  | ||||||
|  *     responses: |  | ||||||
|  *       '200': |  | ||||||
|  *         description: Failed indexes successfully retrieved |  | ||||||
|  *     security: |  | ||||||
|  *       - session: [] |  | ||||||
|  *     tags: ["llm"] |  | ||||||
|  */ |  | ||||||
| async function getFailedIndexes(req: Request, res: Response) { |  | ||||||
|     try { |  | ||||||
|         // Check if AI is enabled |  | ||||||
|         const aiEnabled = await options.getOptionBool('aiEnabled'); |  | ||||||
|         if (!aiEnabled) { |  | ||||||
|             return { |  | ||||||
|                 success: false, |  | ||||||
|                 message: "AI features are disabled" |  | ||||||
|             }; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         const limit = parseInt(req.query.limit as string || "100", 10); |  | ||||||
|  |  | ||||||
|         // Get failed indexes |  | ||||||
|         const failed = await indexService.getFailedIndexes(limit); |  | ||||||
|  |  | ||||||
|         return { |  | ||||||
|             success: true, |  | ||||||
|             failed |  | ||||||
|         }; |  | ||||||
|     } catch (error: any) { |  | ||||||
|         log.error(`Error getting failed indexes: ${error.message || 'Unknown error'}`); |  | ||||||
|         throw new Error(`Failed to get failed indexes: ${error.message || 'Unknown error'}`); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * @swagger |  | ||||||
|  * /api/llm/indexes/notes/{noteId}: |  | ||||||
|  *   put: |  | ||||||
|  *     summary: Retry indexing a specific note that previously failed |  | ||||||
|  *     operationId: llm-retry-index |  | ||||||
|  *     parameters: |  | ||||||
|  *       - name: noteId |  | ||||||
|  *         in: path |  | ||||||
|  *         required: true |  | ||||||
|  *         schema: |  | ||||||
|  *           type: string |  | ||||||
|  *     responses: |  | ||||||
|  *       '200': |  | ||||||
|  *         description: Index retry successfully initiated |  | ||||||
|  *     security: |  | ||||||
|  *       - session: [] |  | ||||||
|  *     tags: ["llm"] |  | ||||||
|  */ |  | ||||||
| async function retryFailedIndex(req: Request, res: Response) { |  | ||||||
|     try { |  | ||||||
|         // Check if AI is enabled |  | ||||||
|         const aiEnabled = await options.getOptionBool('aiEnabled'); |  | ||||||
|         if (!aiEnabled) { |  | ||||||
|             return { |  | ||||||
|                 success: false, |  | ||||||
|                 message: "AI features are disabled" |  | ||||||
|             }; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         const { noteId } = req.params; |  | ||||||
|  |  | ||||||
|         // Retry indexing the note |  | ||||||
|         const result = await indexService.retryFailedNote(noteId); |  | ||||||
|  |  | ||||||
|         return { |  | ||||||
|             success: true, |  | ||||||
|             message: result ? "Note queued for indexing" : "Failed to queue note for indexing" |  | ||||||
|         }; |  | ||||||
|     } catch (error: any) { |  | ||||||
|         log.error(`Error retrying failed index: ${error.message || 'Unknown error'}`); |  | ||||||
|         throw new Error(`Failed to retry index: ${error.message || 'Unknown error'}`); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * @swagger |  | ||||||
|  * /api/llm/indexes/failed: |  | ||||||
|  *   put: |  | ||||||
|  *     summary: Retry indexing all failed notes |  | ||||||
|  *     operationId: llm-retry-all-indexes |  | ||||||
|  *     responses: |  | ||||||
|  *       '200': |  | ||||||
|  *         description: Retry of all failed indexes successfully initiated |  | ||||||
|  *     security: |  | ||||||
|  *       - session: [] |  | ||||||
|  *     tags: ["llm"] |  | ||||||
|  */ |  | ||||||
| async function retryAllFailedIndexes(req: Request, res: Response) { |  | ||||||
|     try { |  | ||||||
|         // Check if AI is enabled |  | ||||||
|         const aiEnabled = await options.getOptionBool('aiEnabled'); |  | ||||||
|         if (!aiEnabled) { |  | ||||||
|             return { |  | ||||||
|                 success: false, |  | ||||||
|                 message: "AI features are disabled" |  | ||||||
|             }; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Retry all failed notes |  | ||||||
|         const count = await indexService.retryAllFailedNotes(); |  | ||||||
|  |  | ||||||
|         return { |  | ||||||
|             success: true, |  | ||||||
|             message: `${count} notes queued for reprocessing` |  | ||||||
|         }; |  | ||||||
|     } catch (error: any) { |  | ||||||
|         log.error(`Error retrying all failed indexes: ${error.message || 'Unknown error'}`); |  | ||||||
|         throw new Error(`Failed to retry all indexes: ${error.message || 'Unknown error'}`); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * @swagger |  | ||||||
|  * /api/llm/indexes/notes/similar: |  | ||||||
|  *   get: |  | ||||||
|  *     summary: Find notes similar to a query string |  | ||||||
|  *     operationId: llm-find-similar-notes |  | ||||||
|  *     parameters: |  | ||||||
|  *       - name: query |  | ||||||
|  *         in: query |  | ||||||
|  *         required: true |  | ||||||
|  *         schema: |  | ||||||
|  *           type: string |  | ||||||
|  *       - name: contextNoteId |  | ||||||
|  *         in: query |  | ||||||
|  *         required: false |  | ||||||
|  *         schema: |  | ||||||
|  *           type: string |  | ||||||
|  *       - name: limit |  | ||||||
|  *         in: query |  | ||||||
|  *         required: false |  | ||||||
|  *         schema: |  | ||||||
|  *           type: integer |  | ||||||
|  *           default: 5 |  | ||||||
|  *     responses: |  | ||||||
|  *       '200': |  | ||||||
|  *         description: Similar notes found successfully |  | ||||||
|  *     security: |  | ||||||
|  *       - session: [] |  | ||||||
|  *     tags: ["llm"] |  | ||||||
|  */ |  | ||||||
| async function findSimilarNotes(req: Request, res: Response) { |  | ||||||
|     try { |  | ||||||
|         // Check if AI is enabled |  | ||||||
|         const aiEnabled = await options.getOptionBool('aiEnabled'); |  | ||||||
|         if (!aiEnabled) { |  | ||||||
|             return { |  | ||||||
|                 success: false, |  | ||||||
|                 message: "AI features are disabled" |  | ||||||
|             }; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         const query = req.query.query as string; |  | ||||||
|         const contextNoteId = req.query.contextNoteId as string | undefined; |  | ||||||
|         const limit = parseInt(req.query.limit as string || "5", 10); |  | ||||||
|  |  | ||||||
|         if (!query) { |  | ||||||
|             return { |  | ||||||
|                 success: false, |  | ||||||
|                 message: "Query is required" |  | ||||||
|             }; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Find similar notes |  | ||||||
|         const similar = await indexService.findSimilarNotes(query, contextNoteId, limit); |  | ||||||
|  |  | ||||||
|         return { |  | ||||||
|             success: true, |  | ||||||
|             similar |  | ||||||
|         }; |  | ||||||
|     } catch (error: any) { |  | ||||||
|         log.error(`Error finding similar notes: ${error.message || 'Unknown error'}`); |  | ||||||
|         throw new Error(`Failed to find similar notes: ${error.message || 'Unknown error'}`); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * @swagger |  | ||||||
|  * /api/llm/indexes/context: |  | ||||||
|  *   get: |  | ||||||
|  *     summary: Generate context for an LLM query based on the knowledge base |  | ||||||
|  *     operationId: llm-generate-context |  | ||||||
|  *     parameters: |  | ||||||
|  *       - name: query |  | ||||||
|  *         in: query |  | ||||||
|  *         required: true |  | ||||||
|  *         schema: |  | ||||||
|  *           type: string |  | ||||||
|  *       - name: contextNoteId |  | ||||||
|  *         in: query |  | ||||||
|  *         required: false |  | ||||||
|  *         schema: |  | ||||||
|  *           type: string |  | ||||||
|  *       - name: depth |  | ||||||
|  *         in: query |  | ||||||
|  *         required: false |  | ||||||
|  *         schema: |  | ||||||
|  *           type: integer |  | ||||||
|  *           default: 2 |  | ||||||
|  *     responses: |  | ||||||
|  *       '200': |  | ||||||
|  *         description: Context generated successfully |  | ||||||
|  *     security: |  | ||||||
|  *       - session: [] |  | ||||||
|  *     tags: ["llm"] |  | ||||||
|  */ |  | ||||||
| async function generateQueryContext(req: Request, res: Response) { |  | ||||||
|     try { |  | ||||||
|         // Check if AI is enabled |  | ||||||
|         const aiEnabled = await options.getOptionBool('aiEnabled'); |  | ||||||
|         if (!aiEnabled) { |  | ||||||
|             return { |  | ||||||
|                 success: false, |  | ||||||
|                 message: "AI features are disabled" |  | ||||||
|             }; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         const query = req.query.query as string; |  | ||||||
|         const contextNoteId = req.query.contextNoteId as string | undefined; |  | ||||||
|         const depth = parseInt(req.query.depth as string || "2", 10); |  | ||||||
|  |  | ||||||
|         if (!query) { |  | ||||||
|             return { |  | ||||||
|                 success: false, |  | ||||||
|                 message: "Query is required" |  | ||||||
|             }; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Generate context |  | ||||||
|         const context = await indexService.generateQueryContext(query, contextNoteId, depth); |  | ||||||
|  |  | ||||||
|         return { |  | ||||||
|             success: true, |  | ||||||
|             context |  | ||||||
|         }; |  | ||||||
|     } catch (error: any) { |  | ||||||
|         log.error(`Error generating query context: ${error.message || 'Unknown error'}`); |  | ||||||
|         throw new Error(`Failed to generate query context: ${error.message || 'Unknown error'}`); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * @swagger |  | ||||||
|  * /api/llm/indexes/notes/{noteId}: |  | ||||||
|  *   post: |  | ||||||
|  *     summary: Index a specific note for LLM knowledge base |  | ||||||
|  *     operationId: llm-index-note |  | ||||||
|  *     parameters: |  | ||||||
|  *       - name: noteId |  | ||||||
|  *         in: path |  | ||||||
|  *         required: true |  | ||||||
|  *         schema: |  | ||||||
|  *           type: string |  | ||||||
|  *     responses: |  | ||||||
|  *       '200': |  | ||||||
|  *         description: Note indexed successfully |  | ||||||
|  *     security: |  | ||||||
|  *       - session: [] |  | ||||||
|  *     tags: ["llm"] |  | ||||||
|  */ |  | ||||||
| async function indexNote(req: Request, res: Response) { |  | ||||||
|     try { |  | ||||||
|         // Check if AI is enabled |  | ||||||
|         const aiEnabled = await options.getOptionBool('aiEnabled'); |  | ||||||
|         if (!aiEnabled) { |  | ||||||
|             return { |  | ||||||
|                 success: false, |  | ||||||
|                 message: "AI features are disabled" |  | ||||||
|             }; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         const { noteId } = req.params; |  | ||||||
|  |  | ||||||
|         if (!noteId) { |  | ||||||
|             return { |  | ||||||
|                 success: false, |  | ||||||
|                 message: "Note ID is required" |  | ||||||
|             }; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Index the note |  | ||||||
|         const result = await indexService.generateNoteIndex(noteId); |  | ||||||
|  |  | ||||||
|         return { |  | ||||||
|             success: true, |  | ||||||
|             message: result ? "Note indexed successfully" : "Failed to index note" |  | ||||||
|         }; |  | ||||||
|     } catch (error: any) { |  | ||||||
|         log.error(`Error indexing note: ${error.message || 'Unknown error'}`); |  | ||||||
|         throw new Error(`Failed to index note: ${error.message || 'Unknown error'}`); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * @swagger |  * @swagger | ||||||
| @@ -936,15 +547,5 @@ export default { | |||||||
|     listSessions, |     listSessions, | ||||||
|     deleteSession, |     deleteSession, | ||||||
|     sendMessage, |     sendMessage, | ||||||
|     streamMessage, |     streamMessage | ||||||
|  |  | ||||||
|     // Knowledge base index management |  | ||||||
|     getIndexStats, |  | ||||||
|     startIndexing, |  | ||||||
|     getFailedIndexes, |  | ||||||
|     retryFailedIndex, |  | ||||||
|     retryAllFailedIndexes, |  | ||||||
|     findSimilarNotes, |  | ||||||
|     generateQueryContext, |  | ||||||
|     indexNote |  | ||||||
| }; | }; | ||||||
|   | |||||||
| @@ -92,7 +92,7 @@ const ALLOWED_OPTIONS = new Set<OptionNames>([ | |||||||
|     "showLoginInShareTheme", |     "showLoginInShareTheme", | ||||||
|     "splitEditorOrientation", |     "splitEditorOrientation", | ||||||
|  |  | ||||||
|     // AI/LLM integration options |     // AI/LLM integration options (embedding options removed) | ||||||
|     "aiEnabled", |     "aiEnabled", | ||||||
|     "aiTemperature", |     "aiTemperature", | ||||||
|     "aiSystemPrompt", |     "aiSystemPrompt", | ||||||
| @@ -100,30 +100,11 @@ const ALLOWED_OPTIONS = new Set<OptionNames>([ | |||||||
|     "openaiApiKey", |     "openaiApiKey", | ||||||
|     "openaiBaseUrl", |     "openaiBaseUrl", | ||||||
|     "openaiDefaultModel", |     "openaiDefaultModel", | ||||||
|     "openaiEmbeddingModel", |  | ||||||
|     "openaiEmbeddingApiKey", |  | ||||||
|     "openaiEmbeddingBaseUrl", |  | ||||||
|     "anthropicApiKey", |     "anthropicApiKey", | ||||||
|     "anthropicBaseUrl", |     "anthropicBaseUrl", | ||||||
|     "anthropicDefaultModel", |     "anthropicDefaultModel", | ||||||
|     "voyageApiKey", |  | ||||||
|     "voyageEmbeddingModel", |  | ||||||
|     "voyageEmbeddingBaseUrl", |  | ||||||
|     "ollamaBaseUrl", |     "ollamaBaseUrl", | ||||||
|     "ollamaDefaultModel", |     "ollamaDefaultModel", | ||||||
|     "ollamaEmbeddingModel", |  | ||||||
|     "ollamaEmbeddingBaseUrl", |  | ||||||
|     "embeddingAutoUpdateEnabled", |  | ||||||
|     "embeddingDimensionStrategy", |  | ||||||
|     "embeddingSelectedProvider", |  | ||||||
|     "embeddingSimilarityThreshold", |  | ||||||
|     "embeddingBatchSize", |  | ||||||
|     "embeddingUpdateInterval", |  | ||||||
|     "enableAutomaticIndexing", |  | ||||||
|     "maxNotesPerLlmQuery", |  | ||||||
|  |  | ||||||
|     // Embedding options |  | ||||||
|     "embeddingDefaultDimension", |  | ||||||
|     "mfaEnabled", |     "mfaEnabled", | ||||||
|     "mfaMethod" |     "mfaMethod" | ||||||
| ]); | ]); | ||||||
|   | |||||||
| @@ -54,7 +54,6 @@ import relationMapApiRoute from "./api/relation-map.js"; | |||||||
| import otherRoute from "./api/other.js"; | import otherRoute from "./api/other.js"; | ||||||
| import metricsRoute from "./api/metrics.js"; | import metricsRoute from "./api/metrics.js"; | ||||||
| import shareRoutes from "../share/routes.js"; | import shareRoutes from "../share/routes.js"; | ||||||
| import embeddingsRoute from "./api/embeddings.js"; |  | ||||||
| import ollamaRoute from "./api/ollama.js"; | import ollamaRoute from "./api/ollama.js"; | ||||||
| import openaiRoute from "./api/openai.js"; | import openaiRoute from "./api/openai.js"; | ||||||
| import anthropicRoute from "./api/anthropic.js"; | import anthropicRoute from "./api/anthropic.js"; | ||||||
| @@ -377,31 +376,7 @@ function register(app: express.Application) { | |||||||
|     asyncApiRoute(PST, "/api/llm/chat/:chatNoteId/messages", llmRoute.sendMessage); |     asyncApiRoute(PST, "/api/llm/chat/:chatNoteId/messages", llmRoute.sendMessage); | ||||||
|     asyncApiRoute(PST, "/api/llm/chat/:chatNoteId/messages/stream", llmRoute.streamMessage); |     asyncApiRoute(PST, "/api/llm/chat/:chatNoteId/messages/stream", llmRoute.streamMessage); | ||||||
|  |  | ||||||
|     // LLM index management endpoints - reorganized for REST principles |  | ||||||
|     asyncApiRoute(GET, "/api/llm/indexes/stats", llmRoute.getIndexStats); |  | ||||||
|     asyncApiRoute(PST, "/api/llm/indexes", llmRoute.startIndexing); // Create index process |  | ||||||
|     asyncApiRoute(GET, "/api/llm/indexes/failed", llmRoute.getFailedIndexes); |  | ||||||
|     asyncApiRoute(PUT, "/api/llm/indexes/notes/:noteId", llmRoute.retryFailedIndex); // Update index for note |  | ||||||
|     asyncApiRoute(PUT, "/api/llm/indexes/failed", llmRoute.retryAllFailedIndexes); // Update all failed indexes |  | ||||||
|     asyncApiRoute(GET, "/api/llm/indexes/notes/similar", llmRoute.findSimilarNotes); // Get similar notes |  | ||||||
|     asyncApiRoute(GET, "/api/llm/indexes/context", llmRoute.generateQueryContext); // Get context |  | ||||||
|     asyncApiRoute(PST, "/api/llm/indexes/notes/:noteId", llmRoute.indexNote); // Create index for specific note |  | ||||||
|  |  | ||||||
|     // LLM embeddings endpoints |  | ||||||
|     asyncApiRoute(GET, "/api/llm/embeddings/similar/:noteId", embeddingsRoute.findSimilarNotes); |  | ||||||
|     asyncApiRoute(PST, "/api/llm/embeddings/search", embeddingsRoute.searchByText); |  | ||||||
|     asyncApiRoute(GET, "/api/llm/embeddings/providers", embeddingsRoute.getProviders); |  | ||||||
|     asyncApiRoute(PATCH, "/api/llm/embeddings/providers/:providerId", embeddingsRoute.updateProvider); |  | ||||||
|     asyncApiRoute(PST, "/api/llm/embeddings/reprocess", embeddingsRoute.reprocessAllNotes); |  | ||||||
|     asyncApiRoute(GET, "/api/llm/embeddings/queue-status", embeddingsRoute.getQueueStatus); |  | ||||||
|     asyncApiRoute(GET, "/api/llm/embeddings/stats", embeddingsRoute.getEmbeddingStats); |  | ||||||
|     asyncApiRoute(GET, "/api/llm/embeddings/failed", embeddingsRoute.getFailedNotes); |  | ||||||
|     asyncApiRoute(PST, "/api/llm/embeddings/retry/:noteId", embeddingsRoute.retryFailedNote); |  | ||||||
|     asyncApiRoute(PST, "/api/llm/embeddings/retry-all-failed", embeddingsRoute.retryAllFailedNotes); |  | ||||||
|     asyncApiRoute(PST, "/api/llm/embeddings/rebuild-index", embeddingsRoute.rebuildIndex); |  | ||||||
|     asyncApiRoute(GET, "/api/llm/embeddings/index-rebuild-status", embeddingsRoute.getIndexRebuildStatus); |  | ||||||
|     asyncApiRoute(PST, "/api/llm/embeddings/start", embeddingsRoute.startEmbeddings); |  | ||||||
|     asyncApiRoute(PST, "/api/llm/embeddings/stop", embeddingsRoute.stopEmbeddings); |  | ||||||
|  |  | ||||||
|     // LLM provider endpoints - moved under /api/llm/providers hierarchy |     // LLM provider endpoints - moved under /api/llm/providers hierarchy | ||||||
|     asyncApiRoute(GET, "/api/llm/providers/ollama/models", ollamaRoute.listModels); |     asyncApiRoute(GET, "/api/llm/providers/ollama/models", ollamaRoute.listModels); | ||||||
|   | |||||||
| @@ -799,7 +799,6 @@ class ConsistencyChecks { | |||||||
|         this.runEntityChangeChecks("attributes", "attributeId"); |         this.runEntityChangeChecks("attributes", "attributeId"); | ||||||
|         this.runEntityChangeChecks("etapi_tokens", "etapiTokenId"); |         this.runEntityChangeChecks("etapi_tokens", "etapiTokenId"); | ||||||
|         this.runEntityChangeChecks("options", "name"); |         this.runEntityChangeChecks("options", "name"); | ||||||
|         this.runEntityChangeChecks("note_embeddings", "embedId"); |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     findWronglyNamedAttributes() { |     findWronglyNamedAttributes() { | ||||||
|   | |||||||
| @@ -188,7 +188,6 @@ function fillAllEntityChanges() { | |||||||
|         fillEntityChanges("attributes", "attributeId"); |         fillEntityChanges("attributes", "attributeId"); | ||||||
|         fillEntityChanges("etapi_tokens", "etapiTokenId"); |         fillEntityChanges("etapi_tokens", "etapiTokenId"); | ||||||
|         fillEntityChanges("options", "name", "WHERE isSynced = 1"); |         fillEntityChanges("options", "name", "WHERE isSynced = 1"); | ||||||
|         fillEntityChanges("note_embeddings", "embedId"); |  | ||||||
|     }); |     }); | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -5,8 +5,6 @@ import { AnthropicService } from './providers/anthropic_service.js'; | |||||||
| import { ContextExtractor } from './context/index.js'; | import { ContextExtractor } from './context/index.js'; | ||||||
| import agentTools from './context_extractors/index.js'; | import agentTools from './context_extractors/index.js'; | ||||||
| import contextService from './context/services/context_service.js'; | import contextService from './context/services/context_service.js'; | ||||||
| import { getEmbeddingProvider, getEnabledEmbeddingProviders } from './providers/providers.js'; |  | ||||||
| import indexService from './index_service.js'; |  | ||||||
| import log from '../log.js'; | import log from '../log.js'; | ||||||
| import { OllamaService } from './providers/ollama_service.js'; | import { OllamaService } from './providers/ollama_service.js'; | ||||||
| import { OpenAIService } from './providers/openai_service.js'; | import { OpenAIService } from './providers/openai_service.js'; | ||||||
| @@ -22,7 +20,6 @@ import type { NoteSearchResult } from './interfaces/context_interfaces.js'; | |||||||
| // Import new configuration system | // Import new configuration system | ||||||
| import { | import { | ||||||
|     getSelectedProvider, |     getSelectedProvider, | ||||||
|     getSelectedEmbeddingProvider, |  | ||||||
|     parseModelIdentifier, |     parseModelIdentifier, | ||||||
|     isAIEnabled, |     isAIEnabled, | ||||||
|     getDefaultModelForProvider, |     getDefaultModelForProvider, | ||||||
| @@ -307,10 +304,11 @@ export class AIServiceManager implements IAIServiceManager { | |||||||
|  |  | ||||||
|     /** |     /** | ||||||
|      * Get the index service for managing knowledge base indexing |      * Get the index service for managing knowledge base indexing | ||||||
|      * @returns The index service instance |      * @returns null since index service has been removed | ||||||
|      */ |      */ | ||||||
|     getIndexService() { |     getIndexService() { | ||||||
|         return indexService; |         log.info('Index service has been removed - returning null'); | ||||||
|  |         return null; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
| @@ -333,10 +331,11 @@ export class AIServiceManager implements IAIServiceManager { | |||||||
|  |  | ||||||
|     /** |     /** | ||||||
|      * Get the vector search tool for semantic similarity search |      * Get the vector search tool for semantic similarity search | ||||||
|  |      * Returns null since vector search has been removed | ||||||
|      */ |      */ | ||||||
|     getVectorSearchTool() { |     getVectorSearchTool() { | ||||||
|         const tools = agentTools.getTools(); |         log.info('Vector search has been removed - getVectorSearchTool returning null'); | ||||||
|         return tools.vectorSearch; |         return null; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
| @@ -455,8 +454,7 @@ export class AIServiceManager implements IAIServiceManager { | |||||||
|                 return; |                 return; | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             // Initialize index service |             // Index service has been removed - no initialization needed | ||||||
|             await this.getIndexService().initialize(); |  | ||||||
|  |  | ||||||
|             // Tools are already initialized in the constructor |             // Tools are already initialized in the constructor | ||||||
|             // No need to initialize them again |             // No need to initialize them again | ||||||
| @@ -648,7 +646,6 @@ export class AIServiceManager implements IAIServiceManager { | |||||||
|             name: provider, |             name: provider, | ||||||
|             capabilities: { |             capabilities: { | ||||||
|                 chat: true, |                 chat: true, | ||||||
|                 embeddings: provider !== 'anthropic', // Anthropic doesn't have embeddings |  | ||||||
|                 streaming: true, |                 streaming: true, | ||||||
|                 functionCalling: provider === 'openai' // Only OpenAI has function calling |                 functionCalling: provider === 'openai' // Only OpenAI has function calling | ||||||
|             }, |             }, | ||||||
| @@ -676,7 +673,6 @@ export class AIServiceManager implements IAIServiceManager { | |||||||
|         const aiRelatedOptions = [ |         const aiRelatedOptions = [ | ||||||
|             'aiEnabled', |             'aiEnabled', | ||||||
|             'aiSelectedProvider', |             'aiSelectedProvider', | ||||||
|             'embeddingSelectedProvider', |  | ||||||
|             'openaiApiKey', |             'openaiApiKey', | ||||||
|             'openaiBaseUrl',  |             'openaiBaseUrl',  | ||||||
|             'openaiDefaultModel', |             'openaiDefaultModel', | ||||||
| @@ -684,8 +680,7 @@ export class AIServiceManager implements IAIServiceManager { | |||||||
|             'anthropicBaseUrl', |             'anthropicBaseUrl', | ||||||
|             'anthropicDefaultModel', |             'anthropicDefaultModel', | ||||||
|             'ollamaBaseUrl', |             'ollamaBaseUrl', | ||||||
|             'ollamaDefaultModel', |             'ollamaDefaultModel' | ||||||
|             'voyageApiKey' |  | ||||||
|         ]; |         ]; | ||||||
|  |  | ||||||
|         eventService.subscribe(['entityChanged'], async ({ entityName, entity }) => { |         eventService.subscribe(['entityChanged'], async ({ entityName, entity }) => { | ||||||
| @@ -697,15 +692,11 @@ export class AIServiceManager implements IAIServiceManager { | |||||||
|                     const isEnabled = entity.value === 'true'; |                     const isEnabled = entity.value === 'true'; | ||||||
|                      |                      | ||||||
|                     if (isEnabled) { |                     if (isEnabled) { | ||||||
|                         log.info('AI features enabled, initializing AI service and embeddings'); |                         log.info('AI features enabled, initializing AI service'); | ||||||
|                         // Initialize the AI service |                         // Initialize the AI service | ||||||
|                         await this.initialize(); |                         await this.initialize(); | ||||||
|                         // Initialize embeddings through index service |  | ||||||
|                         await indexService.startEmbeddingGeneration(); |  | ||||||
|                     } else { |                     } else { | ||||||
|                         log.info('AI features disabled, stopping embeddings and clearing providers'); |                         log.info('AI features disabled, clearing providers'); | ||||||
|                         // Stop embeddings through index service |  | ||||||
|                         await indexService.stopEmbeddingGeneration(); |  | ||||||
|                         // Clear chat providers |                         // Clear chat providers | ||||||
|                         this.services = {}; |                         this.services = {}; | ||||||
|                     } |                     } | ||||||
| @@ -730,10 +721,6 @@ export class AIServiceManager implements IAIServiceManager { | |||||||
|             // Clear existing chat providers (they will be recreated on-demand) |             // Clear existing chat providers (they will be recreated on-demand) | ||||||
|             this.services = {}; |             this.services = {}; | ||||||
|  |  | ||||||
|             // Clear embedding providers (they will be recreated on-demand when needed) |  | ||||||
|             const providerManager = await import('./providers/providers.js'); |  | ||||||
|             providerManager.clearAllEmbeddingProviders(); |  | ||||||
|  |  | ||||||
|             log.info('LLM services recreated successfully'); |             log.info('LLM services recreated successfully'); | ||||||
|         } catch (error) { |         } catch (error) { | ||||||
|             log.error(`Error recreating LLM services: ${this.handleError(error)}`); |             log.error(`Error recreating LLM services: ${this.handleError(error)}`); | ||||||
| @@ -770,10 +757,6 @@ export default { | |||||||
|     async generateChatCompletion(messages: Message[], options: ChatCompletionOptions = {}): Promise<ChatResponse> { |     async generateChatCompletion(messages: Message[], options: ChatCompletionOptions = {}): Promise<ChatResponse> { | ||||||
|         return getInstance().generateChatCompletion(messages, options); |         return getInstance().generateChatCompletion(messages, options); | ||||||
|     }, |     }, | ||||||
|     // Add validateEmbeddingProviders method |  | ||||||
|     async validateEmbeddingProviders(): Promise<string | null> { |  | ||||||
|         return getInstance().validateConfiguration(); |  | ||||||
|     }, |  | ||||||
|     // Context and index related methods |     // Context and index related methods | ||||||
|     getContextExtractor() { |     getContextExtractor() { | ||||||
|         return getInstance().getContextExtractor(); |         return getInstance().getContextExtractor(); | ||||||
|   | |||||||
| @@ -1,20 +1,19 @@ | |||||||
| /** | /** | ||||||
|  * Handler for LLM context management |  * Handler for LLM context management | ||||||
|  |  * Uses TriliumNext's native search service for powerful note discovery | ||||||
|  */ |  */ | ||||||
| import log from "../../../log.js"; | import log from "../../../log.js"; | ||||||
| import becca from "../../../../becca/becca.js"; | import becca from "../../../../becca/becca.js"; | ||||||
| import vectorStore from "../../embeddings/index.js"; |  | ||||||
| import providerManager from "../../providers/providers.js"; |  | ||||||
| import contextService from "../../context/services/context_service.js"; | import contextService from "../../context/services/context_service.js"; | ||||||
|  | import searchService from "../../../search/services/search.js"; | ||||||
| import type { NoteSource } from "../../interfaces/chat_session.js"; | import type { NoteSource } from "../../interfaces/chat_session.js"; | ||||||
| import { SEARCH_CONSTANTS } from '../../constants/search_constants.js'; |  | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * Handles context management for LLM chat |  * Handles context management for LLM chat | ||||||
|  */ |  */ | ||||||
| export class ContextHandler { | export class ContextHandler { | ||||||
|     /** |     /** | ||||||
|      * Find relevant notes based on search query |      * Find relevant notes based on search query using TriliumNext's search service | ||||||
|      * @param content The search content |      * @param content The search content | ||||||
|      * @param contextNoteId Optional note ID for context |      * @param contextNoteId Optional note ID for context | ||||||
|      * @param limit Maximum number of results to return |      * @param limit Maximum number of results to return | ||||||
| @@ -27,106 +26,182 @@ export class ContextHandler { | |||||||
|                 return []; |                 return []; | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             // Check if embeddings are available |             log.info(`Finding relevant notes for query: "${content.substring(0, 50)}..." using TriliumNext search`); | ||||||
|             const enabledProviders = await providerManager.getEnabledEmbeddingProviders(); |  | ||||||
|             if (enabledProviders.length === 0) { |  | ||||||
|                 log.info("No embedding providers available, can't find relevant notes"); |  | ||||||
|                 return []; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Get the embedding for the query |             const sources: NoteSource[] = []; | ||||||
|             const provider = enabledProviders[0]; |  | ||||||
|             const embedding = await provider.generateEmbeddings(content); |  | ||||||
|  |  | ||||||
|             let results; |  | ||||||
|             if (contextNoteId) { |             if (contextNoteId) { | ||||||
|                 // For branch context, get notes specifically from that branch |                 // For branch context, get notes specifically from that branch and related notes | ||||||
|                 const contextNote = becca.notes[contextNoteId]; |                 const contextNote = becca.notes[contextNoteId]; | ||||||
|                 if (!contextNote) { |                 if (!contextNote) { | ||||||
|                     return []; |                     return []; | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                 const sql = require("../../../../services/sql.js").default; |                 const relevantNotes = this.findNotesInContext(contextNote, content, limit); | ||||||
|                 const childBranches = await sql.getRows(` |                 sources.push(...relevantNotes); | ||||||
|                     SELECT branches.* FROM branches |  | ||||||
|                     WHERE branches.parentNoteId = ? |  | ||||||
|                     AND branches.isDeleted = 0 |  | ||||||
|                 `, [contextNoteId]); |  | ||||||
|  |  | ||||||
|                 const childNoteIds = childBranches.map((branch: any) => branch.noteId); |  | ||||||
|  |  | ||||||
|                 // Include the context note itself |  | ||||||
|                 childNoteIds.push(contextNoteId); |  | ||||||
|  |  | ||||||
|                 // Find similar notes in this context |  | ||||||
|                 results = []; |  | ||||||
|  |  | ||||||
|                 for (const noteId of childNoteIds) { |  | ||||||
|                     const noteEmbedding = await vectorStore.getEmbeddingForNote( |  | ||||||
|                         noteId, |  | ||||||
|                         provider.name, |  | ||||||
|                         provider.getConfig().model |  | ||||||
|                     ); |  | ||||||
|  |  | ||||||
|                     if (noteEmbedding) { |  | ||||||
|                         const similarity = vectorStore.cosineSimilarity( |  | ||||||
|                             embedding, |  | ||||||
|                             noteEmbedding.embedding |  | ||||||
|                         ); |  | ||||||
|  |  | ||||||
|                         if (similarity > SEARCH_CONSTANTS.VECTOR_SEARCH.EXACT_MATCH_THRESHOLD) { |  | ||||||
|                             results.push({ |  | ||||||
|                                 noteId, |  | ||||||
|                                 similarity |  | ||||||
|                             }); |  | ||||||
|                         } |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 // Sort by similarity |  | ||||||
|                 results.sort((a, b) => b.similarity - a.similarity); |  | ||||||
|                 results = results.slice(0, limit); |  | ||||||
|             } else { |             } else { | ||||||
|                 // General search across all notes |                 // General search across all notes using TriliumNext's search service | ||||||
|                 results = await vectorStore.findSimilarNotes( |                 const relevantNotes = this.findNotesBySearch(content, limit); | ||||||
|                     embedding, |                 sources.push(...relevantNotes); | ||||||
|                     provider.name, |  | ||||||
|                     provider.getConfig().model, |  | ||||||
|                     limit |  | ||||||
|                 ); |  | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             // Format the results |             log.info(`Found ${sources.length} relevant notes using TriliumNext search`); | ||||||
|             const sources: NoteSource[] = []; |             return sources.slice(0, limit); | ||||||
|  |  | ||||||
|             for (const result of results) { |  | ||||||
|                 const note = becca.notes[result.noteId]; |  | ||||||
|                 if (!note) continue; |  | ||||||
|  |  | ||||||
|                 let noteContent: string | undefined = undefined; |  | ||||||
|                 if (note.type === 'text') { |  | ||||||
|                     const content = note.getContent(); |  | ||||||
|                     // Handle both string and Buffer types |  | ||||||
|                     noteContent = typeof content === 'string' ? content : |  | ||||||
|                         content instanceof Buffer ? content.toString('utf8') : undefined; |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 sources.push({ |  | ||||||
|                     noteId: result.noteId, |  | ||||||
|                     title: note.title, |  | ||||||
|                     content: noteContent, |  | ||||||
|                     similarity: result.similarity, |  | ||||||
|                     branchId: note.getBranches()[0]?.branchId |  | ||||||
|                 }); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             return sources; |  | ||||||
|         } catch (error: any) { |         } catch (error: any) { | ||||||
|             log.error(`Error finding relevant notes: ${error.message}`); |             log.error(`Error finding relevant notes: ${error.message}`); | ||||||
|             return []; |             return []; | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /** | ||||||
|  |      * Find notes in the context of a specific note (children, siblings, linked notes) | ||||||
|  |      */ | ||||||
|  |     private static findNotesInContext(contextNote: any, searchQuery: string, limit: number): NoteSource[] { | ||||||
|  |         const sources: NoteSource[] = []; | ||||||
|  |         const processedNoteIds = new Set<string>(); | ||||||
|  |  | ||||||
|  |         // Add the context note itself (high priority) | ||||||
|  |         sources.push(this.createNoteSource(contextNote, 1.0)); | ||||||
|  |         processedNoteIds.add(contextNote.noteId); | ||||||
|  |  | ||||||
|  |         // Get child notes (search within children) | ||||||
|  |         try { | ||||||
|  |             const childQuery = `note.childOf.noteId = "${contextNote.noteId}" ${searchQuery}`; | ||||||
|  |             const childSearchResults = searchService.searchNotes(childQuery, { includeArchivedNotes: false }); | ||||||
|  |              | ||||||
|  |             for (const childNote of childSearchResults.slice(0, Math.floor(limit / 2))) { | ||||||
|  |                 if (!processedNoteIds.has(childNote.noteId)) { | ||||||
|  |                     sources.push(this.createNoteSource(childNote, 0.8)); | ||||||
|  |                     processedNoteIds.add(childNote.noteId); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } catch (error) { | ||||||
|  |             log.info(`Child search failed, falling back to direct children: ${error}`); | ||||||
|  |             // Fallback to direct child enumeration | ||||||
|  |             const childNotes = contextNote.getChildNotes(); | ||||||
|  |             for (const child of childNotes.slice(0, Math.floor(limit / 2))) { | ||||||
|  |                 if (!processedNoteIds.has(child.noteId) && !child.isDeleted) { | ||||||
|  |                     sources.push(this.createNoteSource(child, 0.8)); | ||||||
|  |                     processedNoteIds.add(child.noteId); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // Get related notes (through relations) | ||||||
|  |         const relatedNotes = this.getRelatedNotes(contextNote); | ||||||
|  |         for (const related of relatedNotes.slice(0, Math.floor(limit / 2))) { | ||||||
|  |             if (!processedNoteIds.has(related.noteId) && !related.isDeleted) { | ||||||
|  |                 sources.push(this.createNoteSource(related, 0.6)); | ||||||
|  |                 processedNoteIds.add(related.noteId); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // Fill remaining slots with broader search if needed | ||||||
|  |         if (sources.length < limit) { | ||||||
|  |             try { | ||||||
|  |                 const remainingSlots = limit - sources.length; | ||||||
|  |                 const broadSearchResults = searchService.searchNotes(searchQuery, {  | ||||||
|  |                     includeArchivedNotes: false, | ||||||
|  |                     limit: remainingSlots * 2 // Get more to filter out duplicates | ||||||
|  |                 }); | ||||||
|  |                  | ||||||
|  |                 for (const note of broadSearchResults.slice(0, remainingSlots)) { | ||||||
|  |                     if (!processedNoteIds.has(note.noteId)) { | ||||||
|  |                         sources.push(this.createNoteSource(note, 0.5)); | ||||||
|  |                         processedNoteIds.add(note.noteId); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } catch (error) { | ||||||
|  |                 log.error(`Broad search failed: ${error}`); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         return sources.slice(0, limit); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /** | ||||||
|  |      * Find notes by search across all notes using TriliumNext's search service | ||||||
|  |      */ | ||||||
|  |     private static findNotesBySearch(searchQuery: string, limit: number): NoteSource[] { | ||||||
|  |         try { | ||||||
|  |             log.info(`Performing global search for: "${searchQuery}"`); | ||||||
|  |              | ||||||
|  |             // Use TriliumNext's search service for powerful note discovery | ||||||
|  |             const searchResults = searchService.searchNotes(searchQuery, {  | ||||||
|  |                 includeArchivedNotes: false, | ||||||
|  |                 fastSearch: false // Use full search for better results | ||||||
|  |             }); | ||||||
|  |  | ||||||
|  |             log.info(`Global search found ${searchResults.length} notes`); | ||||||
|  |  | ||||||
|  |             // Convert search results to NoteSource format | ||||||
|  |             const sources: NoteSource[] = []; | ||||||
|  |             const limitedResults = searchResults.slice(0, limit); | ||||||
|  |              | ||||||
|  |             for (let i = 0; i < limitedResults.length; i++) { | ||||||
|  |                 const note = limitedResults[i]; | ||||||
|  |                 // Calculate similarity score based on position (first results are more relevant) | ||||||
|  |                 const similarity = Math.max(0.1, 1.0 - (i / limitedResults.length) * 0.8); | ||||||
|  |                 sources.push(this.createNoteSource(note, similarity)); | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             return sources; | ||||||
|  |         } catch (error) { | ||||||
|  |             log.error(`Error in global search: ${error}`); | ||||||
|  |             // Fallback to empty results rather than crashing | ||||||
|  |             return []; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     /** | ||||||
|  |      * Get notes related through attributes/relations | ||||||
|  |      */ | ||||||
|  |     private static getRelatedNotes(note: any): any[] { | ||||||
|  |         const relatedNotes: any[] = []; | ||||||
|  |          | ||||||
|  |         // Get notes this note points to via relations | ||||||
|  |         const outgoingRelations = note.getOwnedAttributes().filter((attr: any) => attr.type === 'relation'); | ||||||
|  |         for (const relation of outgoingRelations) { | ||||||
|  |             const targetNote = becca.notes[relation.value]; | ||||||
|  |             if (targetNote && !targetNote.isDeleted) { | ||||||
|  |                 relatedNotes.push(targetNote); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // Get notes that point to this note via relations | ||||||
|  |         const incomingRelations = note.getTargetRelations(); | ||||||
|  |         for (const relation of incomingRelations) { | ||||||
|  |             const sourceNote = relation.getNote(); | ||||||
|  |             if (sourceNote && !sourceNote.isDeleted) { | ||||||
|  |                 relatedNotes.push(sourceNote); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         return relatedNotes; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /** | ||||||
|  |      * Create a NoteSource object from a note | ||||||
|  |      */ | ||||||
|  |     private static createNoteSource(note: any, similarity: number): NoteSource { | ||||||
|  |         let noteContent: string | undefined = undefined; | ||||||
|  |         if (note.type === 'text') { | ||||||
|  |             const content = note.getContent(); | ||||||
|  |             // Handle both string and Buffer types | ||||||
|  |             noteContent = typeof content === 'string' ? content : | ||||||
|  |                 content instanceof Buffer ? content.toString('utf8') : undefined; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             noteId: note.noteId, | ||||||
|  |             title: note.title, | ||||||
|  |             content: noteContent, | ||||||
|  |             similarity: similarity, | ||||||
|  |             branchId: note.getBranches()[0]?.branchId | ||||||
|  |         }; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
|      * Process enhanced context using the context service |      * Process enhanced context using the context service | ||||||
|      * @param query Query to process |      * @param query Query to process | ||||||
| @@ -165,4 +240,4 @@ export class ContextHandler { | |||||||
|             })) |             })) | ||||||
|         }; |         }; | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -1,5 +1,6 @@ | |||||||
| import configurationManager from './configuration_manager.js'; | import configurationManager from './configuration_manager.js'; | ||||||
| import optionService from '../../options.js'; | import optionService from '../../options.js'; | ||||||
|  | import log from '../../log.js'; | ||||||
| import type { | import type { | ||||||
|     ProviderType, |     ProviderType, | ||||||
|     ModelIdentifier, |     ModelIdentifier, | ||||||
| @@ -19,13 +20,6 @@ export async function getSelectedProvider(): Promise<ProviderType | null> { | |||||||
|     return providerOption as ProviderType || null; |     return providerOption as ProviderType || null; | ||||||
| } | } | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Get the selected embedding provider |  | ||||||
|  */ |  | ||||||
| export async function getSelectedEmbeddingProvider(): Promise<string | null> { |  | ||||||
|     const providerOption = optionService.getOption('embeddingSelectedProvider'); |  | ||||||
|     return providerOption || null; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * Parse a model identifier (handles "provider:model" format) |  * Parse a model identifier (handles "provider:model" format) | ||||||
|   | |||||||
| @@ -3,11 +3,9 @@ import log from '../../log.js'; | |||||||
| import type { | import type { | ||||||
|     AIConfig, |     AIConfig, | ||||||
|     ProviderPrecedenceConfig, |     ProviderPrecedenceConfig, | ||||||
|     EmbeddingProviderPrecedenceConfig, |  | ||||||
|     ModelIdentifier, |     ModelIdentifier, | ||||||
|     ModelConfig, |     ModelConfig, | ||||||
|     ProviderType, |     ProviderType, | ||||||
|     EmbeddingProviderType, |  | ||||||
|     ConfigValidationResult, |     ConfigValidationResult, | ||||||
|     ProviderSettings, |     ProviderSettings, | ||||||
|     OpenAISettings, |     OpenAISettings, | ||||||
| @@ -51,7 +49,6 @@ export class ConfigurationManager { | |||||||
|             const config: AIConfig = { |             const config: AIConfig = { | ||||||
|                 enabled: await this.getAIEnabled(), |                 enabled: await this.getAIEnabled(), | ||||||
|                 selectedProvider: await this.getSelectedProvider(), |                 selectedProvider: await this.getSelectedProvider(), | ||||||
|                 selectedEmbeddingProvider: await this.getSelectedEmbeddingProvider(), |  | ||||||
|                 defaultModels: await this.getDefaultModels(), |                 defaultModels: await this.getDefaultModels(), | ||||||
|                 providerSettings: await this.getProviderSettings() |                 providerSettings: await this.getProviderSettings() | ||||||
|             }; |             }; | ||||||
| @@ -78,18 +75,6 @@ export class ConfigurationManager { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Get the selected embedding provider |  | ||||||
|      */ |  | ||||||
|     public async getSelectedEmbeddingProvider(): Promise<EmbeddingProviderType | null> { |  | ||||||
|         try { |  | ||||||
|             const selectedProvider = options.getOption('embeddingSelectedProvider'); |  | ||||||
|             return selectedProvider as EmbeddingProviderType || null; |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`Error getting selected embedding provider: ${error}`); |  | ||||||
|             return null; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
|      * Parse model identifier with optional provider prefix |      * Parse model identifier with optional provider prefix | ||||||
| @@ -269,10 +254,6 @@ export class ConfigurationManager { | |||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             // Validate selected embedding provider |  | ||||||
|             if (!config.selectedEmbeddingProvider) { |  | ||||||
|                 result.warnings.push('No embedding provider selected'); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|         } catch (error) { |         } catch (error) { | ||||||
|             result.errors.push(`Configuration validation error: ${error}`); |             result.errors.push(`Configuration validation error: ${error}`); | ||||||
| @@ -334,7 +315,6 @@ export class ConfigurationManager { | |||||||
|         return { |         return { | ||||||
|             enabled: false, |             enabled: false, | ||||||
|             selectedProvider: null, |             selectedProvider: null, | ||||||
|             selectedEmbeddingProvider: null, |  | ||||||
|             defaultModels: { |             defaultModels: { | ||||||
|                 openai: undefined, |                 openai: undefined, | ||||||
|                 anthropic: undefined, |                 anthropic: undefined, | ||||||
|   | |||||||
| @@ -1,9 +0,0 @@ | |||||||
| export const EMBEDDING_CONSTANTS = { |  | ||||||
|     exactTitleMatch: 0.3, |  | ||||||
|     titleContainsQuery: 0.2, |  | ||||||
|     partialTitleMatch: 0.1, |  | ||||||
|     sameType: 0.05, |  | ||||||
|     attributeMatch: 0.05, |  | ||||||
|     recentlyCreated: 0.05, |  | ||||||
|     recentlyModified: 0.05 |  | ||||||
| }; |  | ||||||
| @@ -1,16 +1,4 @@ | |||||||
| export const SEARCH_CONSTANTS = { | export const SEARCH_CONSTANTS = { | ||||||
|     // Vector search parameters |  | ||||||
|     VECTOR_SEARCH: { |  | ||||||
|         DEFAULT_MAX_RESULTS: 10, |  | ||||||
|         DEFAULT_THRESHOLD: 0.6, |  | ||||||
|         SIMILARITY_THRESHOLD: { |  | ||||||
|             COSINE: 0.6, |  | ||||||
|             HYBRID: 0.3, |  | ||||||
|             DIM_AWARE: 0.1 |  | ||||||
|         }, |  | ||||||
|         EXACT_MATCH_THRESHOLD: 0.65 |  | ||||||
|     }, |  | ||||||
|  |  | ||||||
|     // Context extraction parameters |     // Context extraction parameters | ||||||
|     CONTEXT: { |     CONTEXT: { | ||||||
|         CONTENT_LENGTH: { |         CONTENT_LENGTH: { | ||||||
| @@ -40,7 +28,6 @@ export const SEARCH_CONSTANTS = { | |||||||
|     TEMPERATURE: { |     TEMPERATURE: { | ||||||
|         DEFAULT: 0.7, |         DEFAULT: 0.7, | ||||||
|         RELATIONSHIP_TOOL: 0.4, |         RELATIONSHIP_TOOL: 0.4, | ||||||
|         VECTOR_SEARCH: 0.3, |  | ||||||
|         QUERY_PROCESSOR: 0.3 |         QUERY_PROCESSOR: 0.3 | ||||||
|     }, |     }, | ||||||
|  |  | ||||||
| @@ -49,7 +36,6 @@ export const SEARCH_CONSTANTS = { | |||||||
|         DEFAULT_NOTE_SUMMARY_LENGTH: 500, |         DEFAULT_NOTE_SUMMARY_LENGTH: 500, | ||||||
|         DEFAULT_MAX_TOKENS: 4096, |         DEFAULT_MAX_TOKENS: 4096, | ||||||
|         RELATIONSHIP_TOOL_MAX_TOKENS: 50, |         RELATIONSHIP_TOOL_MAX_TOKENS: 50, | ||||||
|         VECTOR_SEARCH_MAX_TOKENS: 500, |  | ||||||
|         QUERY_PROCESSOR_MAX_TOKENS: 300, |         QUERY_PROCESSOR_MAX_TOKENS: 300, | ||||||
|         MIN_STRING_LENGTH: 3 |         MIN_STRING_LENGTH: 3 | ||||||
|     }, |     }, | ||||||
| @@ -87,51 +73,3 @@ export const MODEL_CAPABILITIES = { | |||||||
|     } |     } | ||||||
| }; | }; | ||||||
|  |  | ||||||
| // Embedding processing constants |  | ||||||
| export const EMBEDDING_PROCESSING = { |  | ||||||
|     MAX_TOTAL_PROCESSING_TIME: 5 * 60 * 1000, // 5 minutes |  | ||||||
|     MAX_CHUNK_RETRY_ATTEMPTS: 2, |  | ||||||
|     DEFAULT_MAX_CHUNK_PROCESSING_TIME: 60 * 1000, // 1 minute |  | ||||||
|     OLLAMA_MAX_CHUNK_PROCESSING_TIME: 120 * 1000, // 2 minutes |  | ||||||
|     DEFAULT_EMBEDDING_UPDATE_INTERVAL: 200 |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| // Provider-specific embedding capabilities |  | ||||||
| export const PROVIDER_EMBEDDING_CAPABILITIES = { |  | ||||||
|     VOYAGE: { |  | ||||||
|         MODELS: { |  | ||||||
|             'voyage-large-2': { |  | ||||||
|                 contextWidth: 8192, |  | ||||||
|                 dimension: 1536 |  | ||||||
|             }, |  | ||||||
|             'voyage-2': { |  | ||||||
|                 contextWidth: 8192, |  | ||||||
|                 dimension: 1024 |  | ||||||
|             }, |  | ||||||
|             'voyage-lite-02': { |  | ||||||
|                 contextWidth: 8192, |  | ||||||
|                 dimension: 768 |  | ||||||
|             }, |  | ||||||
|             'default': { |  | ||||||
|                 contextWidth: 8192, |  | ||||||
|                 dimension: 1024 |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     }, |  | ||||||
|     OPENAI: { |  | ||||||
|         MODELS: { |  | ||||||
|             'text-embedding-3-small': { |  | ||||||
|                 dimension: 1536, |  | ||||||
|                 contextWindow: 8191 |  | ||||||
|             }, |  | ||||||
|             'text-embedding-3-large': { |  | ||||||
|                 dimension: 3072, |  | ||||||
|                 contextWindow: 8191 |  | ||||||
|             }, |  | ||||||
|             'default': { |  | ||||||
|                 dimension: 1536, |  | ||||||
|                 contextWindow: 8192 |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| }; |  | ||||||
|   | |||||||
| @@ -47,7 +47,7 @@ export class ContextFormatter implements IContextFormatter { | |||||||
|             let modelName = providerId; |             let modelName = providerId; | ||||||
|  |  | ||||||
|             // Look up model capabilities |             // Look up model capabilities | ||||||
|             const modelCapabilities = await modelCapabilitiesService.getModelCapabilities(modelName); |             const modelCapabilities = await modelCapabilitiesService.getChatModelCapabilities(modelName); | ||||||
|  |  | ||||||
|             // Calculate available context size for this conversation |             // Calculate available context size for this conversation | ||||||
|             const availableContextSize = calculateAvailableContextSize( |             const availableContextSize = calculateAvailableContextSize( | ||||||
|   | |||||||
| @@ -1,83 +1,37 @@ | |||||||
| import log from '../../../log.js'; | import log from '../../../log.js'; | ||||||
| import { getEmbeddingProvider, getEnabledEmbeddingProviders } from '../../providers/providers.js'; |  | ||||||
| import { getSelectedEmbeddingProvider as getSelectedEmbeddingProviderName } from '../../config/configuration_helpers.js'; |  | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * Manages embedding providers for context services |  * Manages embedding providers for context services | ||||||
|  |  * Simplified since embedding functionality has been removed | ||||||
|  */ |  */ | ||||||
| export class ProviderManager { | export class ProviderManager { | ||||||
|     /** |     /** | ||||||
|      * Get the selected embedding provider based on user settings |      * Get the selected embedding provider based on user settings | ||||||
|      * Uses the single provider selection approach |      * Returns null since embeddings have been removed | ||||||
|      * |  | ||||||
|      * @returns The selected embedding provider or null if none available |  | ||||||
|      */ |      */ | ||||||
|     async getSelectedEmbeddingProvider(): Promise<any> { |     async getSelectedEmbeddingProvider(): Promise<null> { | ||||||
|         try { |         log.info('Embedding providers have been removed - returning null'); | ||||||
|             // Get the selected embedding provider |         return null; | ||||||
|             const selectedProvider = await getSelectedEmbeddingProviderName(); |  | ||||||
|              |  | ||||||
|             if (selectedProvider) { |  | ||||||
|                 const provider = await getEmbeddingProvider(selectedProvider); |  | ||||||
|                 if (provider) { |  | ||||||
|                     log.info(`Using selected embedding provider: ${selectedProvider}`); |  | ||||||
|                     return provider; |  | ||||||
|                 } |  | ||||||
|                 log.info(`Selected embedding provider ${selectedProvider} is not available`); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // If no provider is selected or available, try any enabled provider |  | ||||||
|             const providers = await getEnabledEmbeddingProviders(); |  | ||||||
|             if (providers.length > 0) { |  | ||||||
|                 log.info(`Using available embedding provider: ${providers[0].name}`); |  | ||||||
|                 return providers[0]; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Last resort is local provider |  | ||||||
|             log.info('Using local embedding provider as fallback'); |  | ||||||
|             return await getEmbeddingProvider('local'); |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`Error getting preferred embedding provider: ${error}`); |  | ||||||
|             return null; |  | ||||||
|         } |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
|      * Generate embeddings for a text query |      * Get all enabled embedding providers | ||||||
|      * |      * Returns empty array since embeddings have been removed | ||||||
|      * @param query - The text query to embed |  | ||||||
|      * @returns The generated embedding or null if failed |  | ||||||
|      */ |      */ | ||||||
|     async generateQueryEmbedding(query: string): Promise<Float32Array | null> { |     async getEnabledEmbeddingProviders(): Promise<never[]> { | ||||||
|         try { |         log.info('Embedding providers have been removed - returning empty array'); | ||||||
|             // Get the preferred embedding provider |         return []; | ||||||
|             const provider = await this.getSelectedEmbeddingProvider(); |     } | ||||||
|             if (!provider) { |  | ||||||
|                 log.error('No embedding provider available'); |  | ||||||
|                 return null; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Generate the embedding |     /** | ||||||
|             const embedding = await provider.generateEmbeddings(query); |      * Check if embedding providers are available | ||||||
|  |      * Returns false since embeddings have been removed | ||||||
|             if (embedding) { |      */ | ||||||
|                 // Add the original query as a property to the embedding |     isEmbeddingAvailable(): boolean { | ||||||
|                 // This is used for title matching in the vector search |         return false; | ||||||
|                 Object.defineProperty(embedding, 'originalQuery', { |  | ||||||
|                     value: query, |  | ||||||
|                     writable: false, |  | ||||||
|                     enumerable: true, |  | ||||||
|                     configurable: false |  | ||||||
|                 }); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             return embedding; |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`Error generating query embedding: ${error}`); |  | ||||||
|             return null; |  | ||||||
|         } |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| // Export singleton instance | // Export singleton instance | ||||||
| export default new ProviderManager(); | export const providerManager = new ProviderManager(); | ||||||
|  | export default providerManager; | ||||||
| @@ -13,7 +13,6 @@ | |||||||
| import log from '../../../log.js'; | import log from '../../../log.js'; | ||||||
| import providerManager from '../modules/provider_manager.js'; | import providerManager from '../modules/provider_manager.js'; | ||||||
| import cacheManager from '../modules/cache_manager.js'; | import cacheManager from '../modules/cache_manager.js'; | ||||||
| import vectorSearchService from './vector_search_service.js'; |  | ||||||
| import queryProcessor from './query_processor.js'; | import queryProcessor from './query_processor.js'; | ||||||
| import contextFormatter from '../modules/context_formatter.js'; | import contextFormatter from '../modules/context_formatter.js'; | ||||||
| import aiServiceManager from '../../ai_service_manager.js'; | import aiServiceManager from '../../ai_service_manager.js'; | ||||||
| @@ -67,7 +66,7 @@ export class ContextService { | |||||||
|                 // No need to initialize them again |                 // No need to initialize them again | ||||||
|  |  | ||||||
|                 this.initialized = true; |                 this.initialized = true; | ||||||
|                 log.info(`Context service initialized with provider: ${provider.name}`); |                 log.info(`Context service initialized - embeddings disabled`); | ||||||
|             } catch (error: unknown) { |             } catch (error: unknown) { | ||||||
|                 const errorMessage = error instanceof Error ? error.message : String(error); |                 const errorMessage = error instanceof Error ? error.message : String(error); | ||||||
|                 log.error(`Failed to initialize context service: ${errorMessage}`); |                 log.error(`Failed to initialize context service: ${errorMessage}`); | ||||||
| @@ -178,54 +177,46 @@ export class ContextService { | |||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             // Step 3: Find relevant notes using vector search |             // Step 3: Find relevant notes using basic text search (since embeddings are removed) | ||||||
|             const allResults = new Map<string, NoteSearchResult>(); |             // This will use traditional note search instead of vector similarity | ||||||
|  |             log.info("Using traditional search instead of embedding-based search"); | ||||||
|             for (const query of searchQueries) { |              | ||||||
|  |             // Use fallback context based on the context note if provided | ||||||
|  |             if (contextNoteId) { | ||||||
|                 try { |                 try { | ||||||
|                     log.info(`Searching for: "${query.substring(0, 50)}..."`); |                     const becca = (await import('../../../../becca/becca.js')).default; | ||||||
|  |                     const contextNote = becca.getNote(contextNoteId); | ||||||
|                     // Use the unified vector search service |                     if (contextNote) { | ||||||
|                     const results = await vectorSearchService.findRelevantNotes( |                         const content = await this.contextExtractor.getNoteContent(contextNoteId); | ||||||
|                         query, |                         relevantNotes = [{ | ||||||
|                         contextNoteId, |                             noteId: contextNoteId, | ||||||
|                         { |                             title: contextNote.title, | ||||||
|                             maxResults: maxResults, |                             similarity: 1.0, | ||||||
|                             summarizeContent: summarizeContent, |                             content: content || "" | ||||||
|                             llmService: summarizeContent ? llmService : null |                         }]; | ||||||
|                         } |                          | ||||||
|                     ); |                         // Add child notes as additional context | ||||||
|  |                         const childNotes = contextNote.getChildNotes().slice(0, maxResults - 1); | ||||||
|                     log.info(`Found ${results.length} results for query "${query.substring(0, 30)}..."`); |                         for (const child of childNotes) { | ||||||
|  |                             const childContent = await this.contextExtractor.getNoteContent(child.noteId); | ||||||
|                     // Combine results, avoiding duplicates |                             relevantNotes.push({ | ||||||
|                     for (const result of results) { |                                 noteId: child.noteId, | ||||||
|                         if (!allResults.has(result.noteId)) { |                                 title: child.title, | ||||||
|                             allResults.set(result.noteId, result); |                                 similarity: 0.8, | ||||||
|                         } else { |                                 content: childContent || "" | ||||||
|                             // If note already exists, update similarity to max of both values |                             }); | ||||||
|                             const existing = allResults.get(result.noteId); |  | ||||||
|                             if (existing && result.similarity > existing.similarity) { |  | ||||||
|                                 existing.similarity = result.similarity; |  | ||||||
|                                 allResults.set(result.noteId, existing); |  | ||||||
|                             } |  | ||||||
|                         } |                         } | ||||||
|                     } |                     } | ||||||
|                 } catch (error) { |                 } catch (error) { | ||||||
|                     log.error(`Error searching for query "${query}": ${error}`); |                     log.error(`Error accessing context note: ${error}`); | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             // Convert to array and sort by similarity |  | ||||||
|             relevantNotes = Array.from(allResults.values()) |  | ||||||
|                 .sort((a, b) => b.similarity - a.similarity) |  | ||||||
|                 .slice(0, maxResults); |  | ||||||
|  |  | ||||||
|             log.info(`Final combined results: ${relevantNotes.length} relevant notes`); |             log.info(`Final combined results: ${relevantNotes.length} relevant notes`); | ||||||
|  |  | ||||||
|             // Step 4: Build context from the notes |             // Step 4: Build context from the notes | ||||||
|             const provider = await providerManager.getSelectedEmbeddingProvider(); |             const provider = await providerManager.getSelectedEmbeddingProvider(); | ||||||
|             const providerId = provider?.name || 'default'; |             const providerId = 'default'; // Provider is always null since embeddings removed | ||||||
|  |  | ||||||
|             const context = await contextFormatter.buildContextFromNotes( |             const context = await contextFormatter.buildContextFromNotes( | ||||||
|                 relevantNotes, |                 relevantNotes, | ||||||
| @@ -332,15 +323,10 @@ export class ContextService { | |||||||
|             llmService?: LLMServiceInterface | null |             llmService?: LLMServiceInterface | null | ||||||
|         } = {} |         } = {} | ||||||
|     ): Promise<NoteSearchResult[]> { |     ): Promise<NoteSearchResult[]> { | ||||||
|         return vectorSearchService.findRelevantNotes( |         // Vector search has been removed - return empty results | ||||||
|             query, |         // The LLM will rely on tool calls for context gathering | ||||||
|             contextNoteId, |         log.info(`Vector search disabled - findRelevantNotes returning empty results for query: ${query}`); | ||||||
|             { |         return []; | ||||||
|                 maxResults: options.maxResults, |  | ||||||
|                 summarizeContent: options.summarize, |  | ||||||
|                 llmService: options.llmService |  | ||||||
|             } |  | ||||||
|         ); |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -5,23 +5,19 @@ | |||||||
|  * consolidated from previously overlapping implementations: |  * consolidated from previously overlapping implementations: | ||||||
|  * |  * | ||||||
|  * - ContextService: Main entry point for context extraction operations |  * - ContextService: Main entry point for context extraction operations | ||||||
|  * - VectorSearchService: Unified semantic search functionality |  | ||||||
|  * - QueryProcessor: Query enhancement and decomposition |  * - QueryProcessor: Query enhancement and decomposition | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
| import contextService from './context_service.js'; | import contextService from './context_service.js'; | ||||||
| import vectorSearchService from './vector_search_service.js'; |  | ||||||
| import queryProcessor from './query_processor.js'; | import queryProcessor from './query_processor.js'; | ||||||
|  |  | ||||||
| export { | export { | ||||||
|   contextService, |   contextService, | ||||||
|   vectorSearchService, |  | ||||||
|   queryProcessor |   queryProcessor | ||||||
| }; | }; | ||||||
|  |  | ||||||
| // Export types | // Export types | ||||||
| export type { ContextOptions } from './context_service.js'; | export type { ContextOptions } from './context_service.js'; | ||||||
| export type { VectorSearchOptions } from './vector_search_service.js'; |  | ||||||
| export type { SubQuery, DecomposedQuery } from './query_processor.js'; | export type { SubQuery, DecomposedQuery } from './query_processor.js'; | ||||||
|  |  | ||||||
| // Default export for backwards compatibility | // Default export for backwards compatibility | ||||||
|   | |||||||
| @@ -1,464 +0,0 @@ | |||||||
| /** |  | ||||||
|  * Unified Vector Search Service |  | ||||||
|  * |  | ||||||
|  * Consolidates functionality from: |  | ||||||
|  * - semantic_search.ts |  | ||||||
|  * - vector_search_stage.ts |  | ||||||
|  * |  | ||||||
|  * This service provides a central interface for all vector search operations, |  | ||||||
|  * supporting both full and summarized note context extraction. |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| import * as vectorStore from '../../embeddings/index.js'; |  | ||||||
| import { cosineSimilarity } from '../../embeddings/index.js'; |  | ||||||
| import log from '../../../log.js'; |  | ||||||
| import becca from '../../../../becca/becca.js'; |  | ||||||
| import providerManager from '../modules/provider_manager.js'; |  | ||||||
| import cacheManager from '../modules/cache_manager.js'; |  | ||||||
| import type { NoteSearchResult } from '../../interfaces/context_interfaces.js'; |  | ||||||
| import type { LLMServiceInterface } from '../../interfaces/agent_tool_interfaces.js'; |  | ||||||
| import { SEARCH_CONSTANTS } from '../../constants/search_constants.js'; |  | ||||||
| import { isNoteExcludedFromAI } from '../../utils/ai_exclusion_utils.js'; |  | ||||||
|  |  | ||||||
| export interface VectorSearchOptions { |  | ||||||
|     maxResults?: number; |  | ||||||
|     threshold?: number; |  | ||||||
|     useEnhancedQueries?: boolean; |  | ||||||
|     summarizeContent?: boolean; |  | ||||||
|     llmService?: LLMServiceInterface | null; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| export class VectorSearchService { |  | ||||||
|     private contextExtractor: any; |  | ||||||
|  |  | ||||||
|     constructor() { |  | ||||||
|         // Lazy load the context extractor to avoid circular dependencies |  | ||||||
|         import('../index.js').then(module => { |  | ||||||
|             this.contextExtractor = new module.ContextExtractor(); |  | ||||||
|         }); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Find notes that are semantically relevant to a query |  | ||||||
|      * |  | ||||||
|      * @param query - The search query |  | ||||||
|      * @param contextNoteId - Optional note ID to restrict search to a branch |  | ||||||
|      * @param options - Search options including result limit and summarization preference |  | ||||||
|      * @returns Array of relevant notes with similarity scores |  | ||||||
|      */ |  | ||||||
|     async findRelevantNotes( |  | ||||||
|         query: string, |  | ||||||
|         contextNoteId: string | null = null, |  | ||||||
|         options: VectorSearchOptions = {} |  | ||||||
|     ): Promise<NoteSearchResult[]> { |  | ||||||
|         const { |  | ||||||
|             maxResults = SEARCH_CONSTANTS.VECTOR_SEARCH.DEFAULT_MAX_RESULTS, |  | ||||||
|             threshold = SEARCH_CONSTANTS.VECTOR_SEARCH.DEFAULT_THRESHOLD, |  | ||||||
|             useEnhancedQueries = false, |  | ||||||
|             summarizeContent = false, |  | ||||||
|             llmService = null |  | ||||||
|         } = options; |  | ||||||
|  |  | ||||||
|         log.info(`VectorSearchService: Finding relevant notes for "${query}"`); |  | ||||||
|         log.info(`Parameters: contextNoteId=${contextNoteId || 'global'}, maxResults=${maxResults}, summarize=${summarizeContent}`); |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             // Check cache first |  | ||||||
|             const cacheKey = `find:${query}:${contextNoteId || 'all'}:${maxResults}:${summarizeContent}`; |  | ||||||
|             const cached = cacheManager.getQueryResults<NoteSearchResult[]>(cacheKey); |  | ||||||
|             if (cached && Array.isArray(cached)) { |  | ||||||
|                 log.info(`VectorSearchService: Returning ${cached.length} cached results`); |  | ||||||
|                 return cached; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Get embedding for query |  | ||||||
|             const queryEmbedding = await providerManager.generateQueryEmbedding(query); |  | ||||||
|             if (!queryEmbedding) { |  | ||||||
|                 log.error('Failed to generate query embedding'); |  | ||||||
|                 return []; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Get provider information |  | ||||||
|             const provider = await providerManager.getSelectedEmbeddingProvider(); |  | ||||||
|             if (!provider) { |  | ||||||
|                 log.error('No embedding provider available'); |  | ||||||
|                 return []; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Find similar notes based on embeddings |  | ||||||
|             let noteResults: { noteId: string, similarity: number }[] = []; |  | ||||||
|  |  | ||||||
|             // If contextNoteId is provided, search only within that branch |  | ||||||
|             if (contextNoteId) { |  | ||||||
|                 noteResults = await this.findNotesInBranch( |  | ||||||
|                     queryEmbedding, |  | ||||||
|                     contextNoteId, |  | ||||||
|                     maxResults |  | ||||||
|                 ); |  | ||||||
|             } else { |  | ||||||
|                 // Otherwise search across all notes with embeddings |  | ||||||
|                 noteResults = await vectorStore.findSimilarNotes( |  | ||||||
|                     queryEmbedding, |  | ||||||
|                     provider.name, |  | ||||||
|                     provider.getConfig().model || '', |  | ||||||
|                     maxResults |  | ||||||
|                 ); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Ensure context extractor is loaded |  | ||||||
|             if (!this.contextExtractor) { |  | ||||||
|                 const module = await import('../index.js'); |  | ||||||
|                 this.contextExtractor = new module.ContextExtractor(); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Get note details for results |  | ||||||
|             const enrichedResults = await Promise.all( |  | ||||||
|                 noteResults.map(async result => { |  | ||||||
|                     const note = becca.getNote(result.noteId); |  | ||||||
|                     if (!note) { |  | ||||||
|                         return null; |  | ||||||
|                     } |  | ||||||
|  |  | ||||||
|                     // Check if this note is excluded from AI features |  | ||||||
|                     if (isNoteExcludedFromAI(note)) { |  | ||||||
|                         return null; // Skip this note if it has the AI exclusion label |  | ||||||
|                     } |  | ||||||
|  |  | ||||||
|                     // Get note content - full or summarized based on option |  | ||||||
|                     let content: string | null = null; |  | ||||||
|  |  | ||||||
|                     if (summarizeContent) { |  | ||||||
|                         content = await this.getSummarizedNoteContent(result.noteId, llmService); |  | ||||||
|                     } else { |  | ||||||
|                         content = await this.contextExtractor.getNoteContent(result.noteId); |  | ||||||
|                     } |  | ||||||
|  |  | ||||||
|                     // Adjust similarity score based on content quality |  | ||||||
|                     let adjustedSimilarity = result.similarity; |  | ||||||
|  |  | ||||||
|                     // Penalize notes with empty or minimal content |  | ||||||
|                     if (!content || content.trim().length <= 10) { |  | ||||||
|                         adjustedSimilarity *= 0.2; |  | ||||||
|                     } |  | ||||||
|                     // Slightly boost notes with substantial content |  | ||||||
|                     else if (content.length > 100) { |  | ||||||
|                         adjustedSimilarity = Math.min(1.0, adjustedSimilarity * 1.1); |  | ||||||
|                     } |  | ||||||
|  |  | ||||||
|                     // Get primary parent note ID |  | ||||||
|                     const parentNotes = note.getParentNotes(); |  | ||||||
|                     const parentId = parentNotes.length > 0 ? parentNotes[0].noteId : undefined; |  | ||||||
|  |  | ||||||
|                     // Create parent chain for context |  | ||||||
|                     const parentPath = await this.getParentPath(result.noteId); |  | ||||||
|  |  | ||||||
|                     return { |  | ||||||
|                         noteId: result.noteId, |  | ||||||
|                         title: note.title, |  | ||||||
|                         content, |  | ||||||
|                         similarity: adjustedSimilarity, |  | ||||||
|                         parentId, |  | ||||||
|                         parentPath |  | ||||||
|                     }; |  | ||||||
|                 }) |  | ||||||
|             ); |  | ||||||
|  |  | ||||||
|             // Filter out null results and notes with very low similarity |  | ||||||
|             const filteredResults = enrichedResults.filter(result => |  | ||||||
|                 result !== null && result.similarity > threshold |  | ||||||
|             ) as NoteSearchResult[]; |  | ||||||
|  |  | ||||||
|             // Sort results by adjusted similarity |  | ||||||
|             filteredResults.sort((a, b) => b.similarity - a.similarity); |  | ||||||
|  |  | ||||||
|             // Limit to requested number of results |  | ||||||
|             const limitedResults = filteredResults.slice(0, maxResults); |  | ||||||
|  |  | ||||||
|             // Cache results |  | ||||||
|             cacheManager.storeQueryResults(cacheKey, limitedResults); |  | ||||||
|  |  | ||||||
|             log.info(`VectorSearchService: Found ${limitedResults.length} relevant notes`); |  | ||||||
|             return limitedResults; |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`Error finding relevant notes: ${error}`); |  | ||||||
|             return []; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Get a summarized version of note content |  | ||||||
|      * |  | ||||||
|      * @param noteId - The note ID to summarize |  | ||||||
|      * @param llmService - Optional LLM service for summarization |  | ||||||
|      * @returns Summarized content or full content if summarization fails |  | ||||||
|      */ |  | ||||||
|     private async getSummarizedNoteContent( |  | ||||||
|         noteId: string, |  | ||||||
|         llmService: LLMServiceInterface | null |  | ||||||
|     ): Promise<string | null> { |  | ||||||
|         try { |  | ||||||
|             // Get the full content first |  | ||||||
|             const fullContent = await this.contextExtractor.getNoteContent(noteId); |  | ||||||
|             if (!fullContent || fullContent.length < 500) { |  | ||||||
|                 // Don't summarize short content |  | ||||||
|                 return fullContent; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Check if we have an LLM service for summarization |  | ||||||
|             if (!llmService) { |  | ||||||
|                 // If no LLM service, truncate the content instead |  | ||||||
|                 return fullContent.substring(0, 500) + "..."; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Check cache for summarized content |  | ||||||
|             const cacheKey = `summary:${noteId}:${fullContent.length}`; |  | ||||||
|             const cached = cacheManager.getNoteData(noteId, cacheKey); |  | ||||||
|             if (cached) { |  | ||||||
|                 return cached as string; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             const note = becca.getNote(noteId); |  | ||||||
|             if (!note) return null; |  | ||||||
|  |  | ||||||
|             // Prepare a summarization prompt |  | ||||||
|             const messages = [ |  | ||||||
|                 { |  | ||||||
|                     role: "system" as const, |  | ||||||
|                     content: "Summarize the following note content concisely while preserving key information. Keep your summary to about 20% of the original length." |  | ||||||
|                 }, |  | ||||||
|                 { |  | ||||||
|                     role: "user" as const, |  | ||||||
|                     content: `Note title: ${note.title}\n\nContent:\n${fullContent}` |  | ||||||
|                 } |  | ||||||
|             ]; |  | ||||||
|  |  | ||||||
|             // Request summarization with safeguards to prevent recursion |  | ||||||
|             const result = await llmService.generateChatCompletion(messages, { |  | ||||||
|                 temperature: SEARCH_CONSTANTS.TEMPERATURE.VECTOR_SEARCH, |  | ||||||
|                 maxTokens: SEARCH_CONSTANTS.LIMITS.VECTOR_SEARCH_MAX_TOKENS, |  | ||||||
|                 // Use any to bypass type checking for these special options |  | ||||||
|                 // that are recognized by the LLM service but not in the interface |  | ||||||
|                 ...(({ |  | ||||||
|                     bypassFormatter: true, |  | ||||||
|                     bypassContextProcessing: true, |  | ||||||
|                     enableTools: false |  | ||||||
|                 } as any)) |  | ||||||
|             }); |  | ||||||
|  |  | ||||||
|             const summary = result.text; |  | ||||||
|  |  | ||||||
|             // Cache the summarization result |  | ||||||
|             cacheManager.storeNoteData(noteId, cacheKey, summary); |  | ||||||
|  |  | ||||||
|             return summary; |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`Error summarizing note content: ${error}`); |  | ||||||
|             // Fall back to getting the full content |  | ||||||
|             return this.contextExtractor.getNoteContent(noteId); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Find notes in a specific branch (subtree) that are relevant to a query |  | ||||||
|      * |  | ||||||
|      * @param embedding - The query embedding |  | ||||||
|      * @param contextNoteId - Root note ID of the branch |  | ||||||
|      * @param limit - Maximum results to return |  | ||||||
|      * @returns Array of note IDs with similarity scores |  | ||||||
|      */ |  | ||||||
|     private async findNotesInBranch( |  | ||||||
|         embedding: Float32Array, |  | ||||||
|         contextNoteId: string, |  | ||||||
|         limit = SEARCH_CONSTANTS.CONTEXT.MAX_SIMILAR_NOTES |  | ||||||
|     ): Promise<{ noteId: string, similarity: number }[]> { |  | ||||||
|         try { |  | ||||||
|             // Get all notes in the subtree |  | ||||||
|             const noteIds = await this.getSubtreeNoteIds(contextNoteId); |  | ||||||
|  |  | ||||||
|             if (noteIds.length === 0) { |  | ||||||
|                 return []; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Get provider information |  | ||||||
|             const provider = await providerManager.getSelectedEmbeddingProvider(); |  | ||||||
|             if (!provider) { |  | ||||||
|                 log.error('No embedding provider available'); |  | ||||||
|                 return []; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Get model configuration |  | ||||||
|             const model = provider.getConfig().model || ''; |  | ||||||
|             const providerName = provider.name; |  | ||||||
|  |  | ||||||
|             // Get embeddings for all notes in the branch |  | ||||||
|             const results: { noteId: string, similarity: number }[] = []; |  | ||||||
|  |  | ||||||
|             for (const noteId of noteIds) { |  | ||||||
|                 try { |  | ||||||
|                     // Check if this note is excluded from AI features |  | ||||||
|                     const note = becca.getNote(noteId); |  | ||||||
|                     if (!note || isNoteExcludedFromAI(note)) { |  | ||||||
|                         continue; // Skip this note if it doesn't exist or has the AI exclusion label |  | ||||||
|                     } |  | ||||||
|  |  | ||||||
|                     // Get note embedding |  | ||||||
|                     const embeddingResult = await vectorStore.getEmbeddingForNote( |  | ||||||
|                         noteId, |  | ||||||
|                         providerName, |  | ||||||
|                         model |  | ||||||
|                     ); |  | ||||||
|  |  | ||||||
|                     if (embeddingResult && embeddingResult.embedding) { |  | ||||||
|                         // Calculate similarity |  | ||||||
|                         const similarity = cosineSimilarity(embedding, embeddingResult.embedding); |  | ||||||
|                         results.push({ noteId, similarity }); |  | ||||||
|                     } |  | ||||||
|                 } catch (error) { |  | ||||||
|                     log.error(`Error processing note ${noteId} for branch search: ${error}`); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Sort by similarity and return top results |  | ||||||
|             return results |  | ||||||
|                 .sort((a, b) => b.similarity - a.similarity) |  | ||||||
|                 .slice(0, limit); |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`Error in branch search: ${error}`); |  | ||||||
|             return []; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Get all note IDs in a subtree (branch) |  | ||||||
|      * |  | ||||||
|      * @param rootNoteId - The root note ID of the branch |  | ||||||
|      * @returns Array of note IDs in the subtree |  | ||||||
|      */ |  | ||||||
|     private async getSubtreeNoteIds(rootNoteId: string): Promise<string[]> { |  | ||||||
|         try { |  | ||||||
|             const note = becca.getNote(rootNoteId); |  | ||||||
|             if (!note) return []; |  | ||||||
|  |  | ||||||
|             const noteIds = new Set<string>([rootNoteId]); |  | ||||||
|             const processChildNotes = async (noteId: string) => { |  | ||||||
|                 const childNotes = becca.getNote(noteId)?.getChildNotes() || []; |  | ||||||
|                 for (const childNote of childNotes) { |  | ||||||
|                     if (!noteIds.has(childNote.noteId)) { |  | ||||||
|                         noteIds.add(childNote.noteId); |  | ||||||
|                         await processChildNotes(childNote.noteId); |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|             }; |  | ||||||
|  |  | ||||||
|             await processChildNotes(rootNoteId); |  | ||||||
|             return Array.from(noteIds); |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`Error getting subtree note IDs: ${error}`); |  | ||||||
|             return []; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Get the parent path for a note (for additional context) |  | ||||||
|      * |  | ||||||
|      * @param noteId - The note ID to get the parent path for |  | ||||||
|      * @returns String representation of the parent path |  | ||||||
|      */ |  | ||||||
|     private async getParentPath(noteId: string): Promise<string> { |  | ||||||
|         try { |  | ||||||
|             const note = becca.getNote(noteId); |  | ||||||
|             if (!note) return ''; |  | ||||||
|  |  | ||||||
|             const path: string[] = []; |  | ||||||
|             const parentNotes = note.getParentNotes(); |  | ||||||
|             let currentNote = parentNotes.length > 0 ? parentNotes[0] : null; |  | ||||||
|  |  | ||||||
|             // Build path up to the maximum parent depth |  | ||||||
|             let level = 0; |  | ||||||
|             while (currentNote && level < SEARCH_CONSTANTS.CONTEXT.MAX_PARENT_DEPTH) { |  | ||||||
|                 path.unshift(currentNote.title); |  | ||||||
|                 const grandParents = currentNote.getParentNotes(); |  | ||||||
|                 currentNote = grandParents.length > 0 ? grandParents[0] : null; |  | ||||||
|                 level++; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             return path.join(' > '); |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`Error getting parent path: ${error}`); |  | ||||||
|             return ''; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Find notes that are semantically relevant to multiple queries |  | ||||||
|      * Combines results from multiple queries, deduplicates them, and returns the most relevant ones |  | ||||||
|      * |  | ||||||
|      * @param queries - Array of search queries |  | ||||||
|      * @param contextNoteId - Optional note ID to restrict search to a branch |  | ||||||
|      * @param options - Search options including result limit and summarization preference |  | ||||||
|      * @returns Array of relevant notes with similarity scores, deduplicated and sorted |  | ||||||
|      */ |  | ||||||
|     async findRelevantNotesMultiQuery( |  | ||||||
|         queries: string[], |  | ||||||
|         contextNoteId: string | null = null, |  | ||||||
|         options: VectorSearchOptions = {} |  | ||||||
|     ): Promise<NoteSearchResult[]> { |  | ||||||
|         if (!queries || queries.length === 0) { |  | ||||||
|             log.info('No queries provided to findRelevantNotesMultiQuery'); |  | ||||||
|             return []; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         log.info(`VectorSearchService: Finding relevant notes for ${queries.length} queries`); |  | ||||||
|         log.info(`Multi-query parameters: contextNoteId=${contextNoteId || 'global'}, queries=${JSON.stringify(queries.map(q => q.substring(0, 20) + '...'))}`); |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             // Create a Map to deduplicate results across queries |  | ||||||
|             const allResults = new Map<string, NoteSearchResult>(); |  | ||||||
|  |  | ||||||
|             // For each query, adjust maxResults to avoid getting too many total results |  | ||||||
|             const adjustedMaxResults = options.maxResults ? |  | ||||||
|                 Math.ceil(options.maxResults / queries.length) : |  | ||||||
|                 Math.ceil(SEARCH_CONSTANTS.VECTOR_SEARCH.DEFAULT_MAX_RESULTS / queries.length); |  | ||||||
|  |  | ||||||
|             // Search for each query and combine results |  | ||||||
|             for (const query of queries) { |  | ||||||
|                 try { |  | ||||||
|                     const queryOptions = { |  | ||||||
|                         ...options, |  | ||||||
|                         maxResults: adjustedMaxResults, |  | ||||||
|                         useEnhancedQueries: false // We're already using enhanced queries |  | ||||||
|                     }; |  | ||||||
|  |  | ||||||
|                     const results = await this.findRelevantNotes(query, contextNoteId, queryOptions); |  | ||||||
|  |  | ||||||
|                     // Merge results, keeping the highest similarity score for duplicates |  | ||||||
|                     for (const note of results) { |  | ||||||
|                         if (!allResults.has(note.noteId) || |  | ||||||
|                             (allResults.has(note.noteId) && note.similarity > (allResults.get(note.noteId)?.similarity || 0))) { |  | ||||||
|                             allResults.set(note.noteId, note); |  | ||||||
|                         } |  | ||||||
|                     } |  | ||||||
|  |  | ||||||
|                     log.info(`Found ${results.length} results for query: "${query.substring(0, 30)}..."`); |  | ||||||
|                 } catch (error) { |  | ||||||
|                     log.error(`Error searching for query "${query}": ${error}`); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Convert map to array and sort by similarity |  | ||||||
|             const combinedResults = Array.from(allResults.values()) |  | ||||||
|                 .sort((a, b) => b.similarity - a.similarity) |  | ||||||
|                 .slice(0, options.maxResults || SEARCH_CONSTANTS.VECTOR_SEARCH.DEFAULT_MAX_RESULTS); |  | ||||||
|  |  | ||||||
|             log.info(`VectorSearchService: Found ${combinedResults.length} total deduplicated results across ${queries.length} queries`); |  | ||||||
|  |  | ||||||
|             return combinedResults; |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`Error in findRelevantNotesMultiQuery: ${error}`); |  | ||||||
|             return []; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Export a singleton instance |  | ||||||
| export default new VectorSearchService(); |  | ||||||
| @@ -7,7 +7,6 @@ | |||||||
| import { ContextualThinkingTool } from './contextual_thinking_tool.js'; | import { ContextualThinkingTool } from './contextual_thinking_tool.js'; | ||||||
| import { NoteNavigatorTool } from './note_navigator_tool.js'; | import { NoteNavigatorTool } from './note_navigator_tool.js'; | ||||||
| import { QueryDecompositionTool } from './query_decomposition_tool.js'; | import { QueryDecompositionTool } from './query_decomposition_tool.js'; | ||||||
| import { VectorSearchTool } from './vector_search_tool.js'; |  | ||||||
|  |  | ||||||
| // Import services needed for initialization | // Import services needed for initialization | ||||||
| import contextService from '../context/services/context_service.js'; | import contextService from '../context/services/context_service.js'; | ||||||
| @@ -17,8 +16,7 @@ import log from '../../log.js'; | |||||||
| import type { | import type { | ||||||
|   IContextualThinkingTool, |   IContextualThinkingTool, | ||||||
|   INoteNavigatorTool, |   INoteNavigatorTool, | ||||||
|   IQueryDecompositionTool, |   IQueryDecompositionTool | ||||||
|   IVectorSearchTool |  | ||||||
| } from '../interfaces/agent_tool_interfaces.js'; | } from '../interfaces/agent_tool_interfaces.js'; | ||||||
|  |  | ||||||
| /** | /** | ||||||
| @@ -27,7 +25,6 @@ import type { | |||||||
|  * Manages and provides access to all available agent tools. |  * Manages and provides access to all available agent tools. | ||||||
|  */ |  */ | ||||||
| export class AgentToolsManager { | export class AgentToolsManager { | ||||||
|   private vectorSearchTool: VectorSearchTool | null = null; |  | ||||||
|   private noteNavigatorTool: NoteNavigatorTool | null = null; |   private noteNavigatorTool: NoteNavigatorTool | null = null; | ||||||
|   private queryDecompositionTool: QueryDecompositionTool | null = null; |   private queryDecompositionTool: QueryDecompositionTool | null = null; | ||||||
|   private contextualThinkingTool: ContextualThinkingTool | null = null; |   private contextualThinkingTool: ContextualThinkingTool | null = null; | ||||||
| @@ -52,16 +49,10 @@ export class AgentToolsManager { | |||||||
|       } |       } | ||||||
|  |  | ||||||
|       // Create tool instances |       // Create tool instances | ||||||
|       this.vectorSearchTool = new VectorSearchTool(); |  | ||||||
|       this.noteNavigatorTool = new NoteNavigatorTool(); |       this.noteNavigatorTool = new NoteNavigatorTool(); | ||||||
|       this.queryDecompositionTool = new QueryDecompositionTool(); |       this.queryDecompositionTool = new QueryDecompositionTool(); | ||||||
|       this.contextualThinkingTool = new ContextualThinkingTool(); |       this.contextualThinkingTool = new ContextualThinkingTool(); | ||||||
|  |  | ||||||
|       // Set context service in the vector search tool |  | ||||||
|       if (this.vectorSearchTool) { |  | ||||||
|         this.vectorSearchTool.setContextService(contextService); |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       this.initialized = true; |       this.initialized = true; | ||||||
|       log.info("Agent tools initialized successfully"); |       log.info("Agent tools initialized successfully"); | ||||||
|     } catch (error) { |     } catch (error) { | ||||||
| @@ -75,11 +66,6 @@ export class AgentToolsManager { | |||||||
|    */ |    */ | ||||||
|   getAllTools() { |   getAllTools() { | ||||||
|     return [ |     return [ | ||||||
|       { |  | ||||||
|         name: "vector_search", |  | ||||||
|         description: "Searches your notes for semantically similar content", |  | ||||||
|         function: this.vectorSearchTool?.search.bind(this.vectorSearchTool) |  | ||||||
|       }, |  | ||||||
|       { |       { | ||||||
|         name: "navigate_to_note", |         name: "navigate_to_note", | ||||||
|         description: "Navigates to a specific note", |         description: "Navigates to a specific note", | ||||||
| @@ -103,7 +89,6 @@ export class AgentToolsManager { | |||||||
|    */ |    */ | ||||||
|   getTools() { |   getTools() { | ||||||
|     return { |     return { | ||||||
|       vectorSearch: this.vectorSearchTool as IVectorSearchTool, |  | ||||||
|       noteNavigator: this.noteNavigatorTool as INoteNavigatorTool, |       noteNavigator: this.noteNavigatorTool as INoteNavigatorTool, | ||||||
|       queryDecomposition: this.queryDecompositionTool as IQueryDecompositionTool, |       queryDecomposition: this.queryDecompositionTool as IQueryDecompositionTool, | ||||||
|       contextualThinking: this.contextualThinkingTool as IContextualThinkingTool |       contextualThinking: this.contextualThinkingTool as IContextualThinkingTool | ||||||
| @@ -117,7 +102,6 @@ export default agentTools; | |||||||
|  |  | ||||||
| // Export all tools for direct import if needed | // Export all tools for direct import if needed | ||||||
| export { | export { | ||||||
|   VectorSearchTool, |  | ||||||
|   NoteNavigatorTool, |   NoteNavigatorTool, | ||||||
|   QueryDecompositionTool, |   QueryDecompositionTool, | ||||||
|   ContextualThinkingTool |   ContextualThinkingTool | ||||||
|   | |||||||
| @@ -1,218 +0,0 @@ | |||||||
| /** |  | ||||||
|  * Vector Search Tool |  | ||||||
|  * |  | ||||||
|  * This tool enables the LLM agent to perform semantic vector-based searches |  | ||||||
|  * over the content in the notes database. It handles: |  | ||||||
|  * - Finding semantically related notes to a query |  | ||||||
|  * - Extracting relevant sections from notes |  | ||||||
|  * - Providing relevant context for LLM to generate accurate responses |  | ||||||
|  * |  | ||||||
|  * Updated to use the consolidated VectorSearchService |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| import log from '../../log.js'; |  | ||||||
| import type { ContextService } from '../context/services/context_service.js'; |  | ||||||
| import vectorSearchService from '../context/services/vector_search_service.js'; |  | ||||||
|  |  | ||||||
| export interface VectorSearchResult { |  | ||||||
|     noteId: string; |  | ||||||
|     title: string; |  | ||||||
|     contentPreview: string; |  | ||||||
|     similarity: number; |  | ||||||
|     parentId?: string; |  | ||||||
|     dateCreated?: string; |  | ||||||
|     dateModified?: string; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| export interface SearchResultItem { |  | ||||||
|     noteId: string; |  | ||||||
|     noteTitle: string; |  | ||||||
|     contentPreview: string; |  | ||||||
|     similarity: number; |  | ||||||
|     parentId?: string; |  | ||||||
|     dateCreated?: string; |  | ||||||
|     dateModified?: string; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| export interface VectorSearchOptions { |  | ||||||
|     limit?: number; |  | ||||||
|     threshold?: number; |  | ||||||
|     includeContent?: boolean; |  | ||||||
|     summarize?: boolean; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Define a type for the context service |  | ||||||
| export interface IVectorContextService { |  | ||||||
|     findRelevantNotes?: (query: string, contextNoteId: string | null, options: Record<string, unknown>) => Promise<unknown[]>; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| export class VectorSearchTool { |  | ||||||
|     private contextService: IVectorContextService | null = null; |  | ||||||
|     private maxResults: number = 5; |  | ||||||
|  |  | ||||||
|     constructor() { |  | ||||||
|         log.info('VectorSearchTool initialized using consolidated VectorSearchService'); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Set the context service for performing vector searches |  | ||||||
|      */ |  | ||||||
|     setContextService(contextService: IVectorContextService): void { |  | ||||||
|         this.contextService = contextService; |  | ||||||
|         log.info('Context service set in VectorSearchTool'); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Perform a vector search for related notes |  | ||||||
|      */ |  | ||||||
|     async search( |  | ||||||
|         query: string, |  | ||||||
|         contextNoteId?: string, |  | ||||||
|         searchOptions: VectorSearchOptions = {} |  | ||||||
|     ): Promise<VectorSearchResult[]> { |  | ||||||
|         try { |  | ||||||
|             // Set more aggressive defaults to return more content |  | ||||||
|             const options = { |  | ||||||
|                 maxResults: searchOptions.limit || 15, // Increased from default |  | ||||||
|                 threshold: searchOptions.threshold || 0.5, // Lower threshold to include more results |  | ||||||
|                 includeContent: searchOptions.includeContent !== undefined ? searchOptions.includeContent : true, |  | ||||||
|                 summarizeContent: searchOptions.summarize || false, |  | ||||||
|                 ...searchOptions |  | ||||||
|             }; |  | ||||||
|  |  | ||||||
|             log.info(`Vector search: "${query.substring(0, 50)}..." with limit=${options.maxResults}, threshold=${options.threshold}`); |  | ||||||
|  |  | ||||||
|             // Use the consolidated vector search service |  | ||||||
|             const searchResults = await vectorSearchService.findRelevantNotes( |  | ||||||
|                 query, |  | ||||||
|                 contextNoteId || null, |  | ||||||
|                 { |  | ||||||
|                     maxResults: options.maxResults, |  | ||||||
|                     threshold: options.threshold, |  | ||||||
|                     summarizeContent: options.summarizeContent |  | ||||||
|                 } |  | ||||||
|             ); |  | ||||||
|  |  | ||||||
|             log.info(`Vector search found ${searchResults.length} relevant notes`); |  | ||||||
|  |  | ||||||
|             // Format results to match the expected VectorSearchResult interface |  | ||||||
|             return searchResults.map(note => ({ |  | ||||||
|                 noteId: note.noteId, |  | ||||||
|                 title: note.title, |  | ||||||
|                 contentPreview: note.content |  | ||||||
|                     ? (options.summarizeContent |  | ||||||
|                         // Don't truncate already summarized content |  | ||||||
|                         ? note.content |  | ||||||
|                         // Only truncate non-summarized content |  | ||||||
|                         : (note.content.length > 200 |  | ||||||
|                             ? note.content.substring(0, 200) + '...' |  | ||||||
|                             : note.content)) |  | ||||||
|                     : 'No content available', |  | ||||||
|                 similarity: note.similarity, |  | ||||||
|                 parentId: note.parentId |  | ||||||
|             })); |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`Vector search error: ${error}`); |  | ||||||
|             return []; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Search for notes that are semantically related to the query |  | ||||||
|      */ |  | ||||||
|     async searchNotes(query: string, options: { |  | ||||||
|         parentNoteId?: string, |  | ||||||
|         maxResults?: number, |  | ||||||
|         similarityThreshold?: number, |  | ||||||
|         summarize?: boolean |  | ||||||
|     } = {}): Promise<VectorSearchResult[]> { |  | ||||||
|         try { |  | ||||||
|             // Set defaults |  | ||||||
|             const maxResults = options.maxResults || this.maxResults; |  | ||||||
|             const threshold = options.similarityThreshold || 0.6; |  | ||||||
|             const parentNoteId = options.parentNoteId || null; |  | ||||||
|             const summarize = options.summarize || false; |  | ||||||
|  |  | ||||||
|             // Use the consolidated vector search service |  | ||||||
|             const results = await vectorSearchService.findRelevantNotes( |  | ||||||
|                 query, |  | ||||||
|                 parentNoteId, |  | ||||||
|                 { |  | ||||||
|                     maxResults, |  | ||||||
|                     threshold, |  | ||||||
|                     summarizeContent: summarize |  | ||||||
|                 } |  | ||||||
|             ); |  | ||||||
|  |  | ||||||
|             // Format results to match the expected interface |  | ||||||
|             return results.map(result => ({ |  | ||||||
|                 noteId: result.noteId, |  | ||||||
|                 title: result.title, |  | ||||||
|                 contentPreview: result.content |  | ||||||
|                     ? (summarize |  | ||||||
|                         // Don't truncate already summarized content |  | ||||||
|                         ? result.content |  | ||||||
|                         // Only truncate non-summarized content |  | ||||||
|                         : (result.content.length > 200 |  | ||||||
|                             ? result.content.substring(0, 200) + '...' |  | ||||||
|                             : result.content)) |  | ||||||
|                     : 'No content available', |  | ||||||
|                 similarity: result.similarity, |  | ||||||
|                 parentId: result.parentId |  | ||||||
|             })); |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`Error in vector search: ${error}`); |  | ||||||
|             return []; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Search for content chunks that are semantically related to the query |  | ||||||
|      */ |  | ||||||
|     async searchContentChunks(query: string, options: { |  | ||||||
|         noteId?: string, |  | ||||||
|         maxResults?: number, |  | ||||||
|         similarityThreshold?: number, |  | ||||||
|         summarize?: boolean |  | ||||||
|     } = {}): Promise<VectorSearchResult[]> { |  | ||||||
|         try { |  | ||||||
|             // For now, use the same implementation as searchNotes, |  | ||||||
|             // but in the future we'll implement chunk-based search |  | ||||||
|             return this.searchNotes(query, { |  | ||||||
|                 parentNoteId: options.noteId, |  | ||||||
|                 maxResults: options.maxResults, |  | ||||||
|                 similarityThreshold: options.similarityThreshold, |  | ||||||
|                 summarize: options.summarize |  | ||||||
|             }); |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`Error in vector chunk search: ${error}`); |  | ||||||
|             return []; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Elaborate on why certain results were returned for a query |  | ||||||
|      */ |  | ||||||
|     explainResults(query: string, results: VectorSearchResult[]): string { |  | ||||||
|         if (!query || !results || results.length === 0) { |  | ||||||
|             return "No results to explain."; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         let explanation = `For query "${query}", I found these semantically related notes:\n\n`; |  | ||||||
|  |  | ||||||
|         results.forEach((result, index) => { |  | ||||||
|             explanation += `${index + 1}. "${result.title}" (similarity: ${(result.similarity * 100).toFixed(1)}%)\n`; |  | ||||||
|             explanation += `   Preview: ${result.contentPreview.substring(0, 150)}...\n`; |  | ||||||
|  |  | ||||||
|             if (index < results.length - 1) { |  | ||||||
|                 explanation += "\n"; |  | ||||||
|             } |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         explanation += "\nThese results were found based on semantic similarity rather than just keyword matching."; |  | ||||||
|  |  | ||||||
|         return explanation; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| export default new VectorSearchTool(); |  | ||||||
| @@ -1,438 +0,0 @@ | |||||||
| import { NormalizationStatus } from './embeddings_interface.js'; |  | ||||||
| import type { NoteEmbeddingContext } from './embeddings_interface.js'; |  | ||||||
| import log from "../../log.js"; |  | ||||||
| import { LLM_CONSTANTS } from "../constants/provider_constants.js"; |  | ||||||
| import options from "../../options.js"; |  | ||||||
| import { isBatchSizeError as checkBatchSizeError } from '../interfaces/error_interfaces.js'; |  | ||||||
| import type { EmbeddingModelInfo } from '../interfaces/embedding_interfaces.js'; |  | ||||||
|  |  | ||||||
| export interface EmbeddingConfig { |  | ||||||
|     model: string; |  | ||||||
|     dimension: number; |  | ||||||
|     type: 'float32' | 'float64'; |  | ||||||
|     apiKey?: string; |  | ||||||
|     baseUrl?: string; |  | ||||||
|     batchSize?: number; |  | ||||||
|     contextWidth?: number; |  | ||||||
|     normalizationStatus?: NormalizationStatus; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Base class for embedding providers that implements common functionality |  | ||||||
|  */ |  | ||||||
| export abstract class BaseEmbeddingProvider { |  | ||||||
|     protected model: string; |  | ||||||
|     protected dimension: number; |  | ||||||
|     protected type: 'float32' | 'float64'; |  | ||||||
|     protected maxBatchSize: number = 100; |  | ||||||
|     protected apiKey?: string; |  | ||||||
|     protected baseUrl: string; |  | ||||||
|     protected name: string = 'base'; |  | ||||||
|     protected modelInfoCache = new Map<string, EmbeddingModelInfo>(); |  | ||||||
|     protected config: EmbeddingConfig; |  | ||||||
|  |  | ||||||
|     constructor(config: EmbeddingConfig) { |  | ||||||
|         this.model = config.model; |  | ||||||
|         this.dimension = config.dimension; |  | ||||||
|         this.type = config.type; |  | ||||||
|         this.apiKey = config.apiKey; |  | ||||||
|         this.baseUrl = config.baseUrl || ''; |  | ||||||
|         this.config = config; |  | ||||||
|  |  | ||||||
|         // If batch size is specified, use it as maxBatchSize |  | ||||||
|         if (config.batchSize) { |  | ||||||
|             this.maxBatchSize = config.batchSize; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     getConfig(): EmbeddingConfig { |  | ||||||
|         return { ...this.config }; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Get the normalization status of this provider |  | ||||||
|      * Default implementation returns the status from config if available, |  | ||||||
|      * otherwise returns UNKNOWN status |  | ||||||
|      */ |  | ||||||
|     getNormalizationStatus(): NormalizationStatus { |  | ||||||
|         return this.config.normalizationStatus || NormalizationStatus.UNKNOWN; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     getDimension(): number { |  | ||||||
|         return this.config.dimension; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     async initialize(): Promise<void> { |  | ||||||
|         // Default implementation does nothing |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Generate embeddings for a single text |  | ||||||
|      */ |  | ||||||
|     abstract generateEmbeddings(text: string): Promise<Float32Array>; |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Get the appropriate batch size for this provider |  | ||||||
|      * Override in provider implementations if needed |  | ||||||
|      */ |  | ||||||
|     protected async getBatchSize(): Promise<number> { |  | ||||||
|         // Try to get the user-configured batch size |  | ||||||
|         let configuredBatchSize: number | null = null; |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             const batchSizeStr = await options.getOption('embeddingBatchSize'); |  | ||||||
|             if (batchSizeStr) { |  | ||||||
|                 configuredBatchSize = parseInt(batchSizeStr, 10); |  | ||||||
|             } |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`Error getting batch size from options: ${error}`); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // If user has configured a specific batch size, use that |  | ||||||
|         if (configuredBatchSize && !isNaN(configuredBatchSize) && configuredBatchSize > 0) { |  | ||||||
|             return configuredBatchSize; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Otherwise use the provider-specific default from constants |  | ||||||
|         return this.config.batchSize || |  | ||||||
|                LLM_CONSTANTS.BATCH_SIZE[this.name.toUpperCase() as keyof typeof LLM_CONSTANTS.BATCH_SIZE] || |  | ||||||
|                LLM_CONSTANTS.BATCH_SIZE.DEFAULT; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Process a batch of texts with adaptive handling |  | ||||||
|      * This method will try to process the batch and reduce batch size if encountering errors |  | ||||||
|      */ |  | ||||||
|     protected async processWithAdaptiveBatch<T, R>( |  | ||||||
|         items: T[], |  | ||||||
|         processFn: (batch: T[]) => Promise<R[]>, |  | ||||||
|         isBatchSizeError: (error: unknown) => boolean |  | ||||||
|     ): Promise<R[]> { |  | ||||||
|         const results: R[] = []; |  | ||||||
|         const failures: { index: number, error: string }[] = []; |  | ||||||
|         let currentBatchSize = await this.getBatchSize(); |  | ||||||
|         let lastError: Error | null = null; |  | ||||||
|  |  | ||||||
|         // Process items in batches |  | ||||||
|         for (let i = 0; i < items.length;) { |  | ||||||
|             const batch = items.slice(i, i + currentBatchSize); |  | ||||||
|  |  | ||||||
|             try { |  | ||||||
|                 // Process the current batch |  | ||||||
|                 const batchResults = await processFn(batch); |  | ||||||
|                 results.push(...batchResults); |  | ||||||
|                 i += batch.length; |  | ||||||
|             } |  | ||||||
|             catch (error) { |  | ||||||
|                 lastError = error as Error; |  | ||||||
|                 const errorMessage = (lastError as Error).message || 'Unknown error'; |  | ||||||
|  |  | ||||||
|                 // Check if this is a batch size related error |  | ||||||
|                 if (isBatchSizeError(error) && currentBatchSize > 1) { |  | ||||||
|                     // Reduce batch size and retry |  | ||||||
|                     const newBatchSize = Math.max(1, Math.floor(currentBatchSize / 2)); |  | ||||||
|                     console.warn(`Batch size error detected, reducing batch size from ${currentBatchSize} to ${newBatchSize}: ${errorMessage}`); |  | ||||||
|                     currentBatchSize = newBatchSize; |  | ||||||
|                 } |  | ||||||
|                 else if (currentBatchSize === 1) { |  | ||||||
|                     // If we're already at batch size 1, we can't reduce further, so log the error and skip this item |  | ||||||
|                     log.error(`Error processing item at index ${i} with batch size 1: ${errorMessage}`); |  | ||||||
|                     failures.push({ index: i, error: errorMessage }); |  | ||||||
|                     i++; // Move to the next item |  | ||||||
|                 } |  | ||||||
|                 else { |  | ||||||
|                     // For other errors, retry with a smaller batch size as a precaution |  | ||||||
|                     const newBatchSize = Math.max(1, Math.floor(currentBatchSize / 2)); |  | ||||||
|                     console.warn(`Error processing batch, reducing batch size from ${currentBatchSize} to ${newBatchSize} as a precaution: ${errorMessage}`); |  | ||||||
|                     currentBatchSize = newBatchSize; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // If all items failed and we have a last error, throw it |  | ||||||
|         if (results.length === 0 && failures.length > 0 && lastError) { |  | ||||||
|             throw lastError; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // If some items failed but others succeeded, log the summary |  | ||||||
|         if (failures.length > 0) { |  | ||||||
|             console.warn(`Processed ${results.length} items successfully, but ${failures.length} items failed`); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         return results; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Detect if an error is related to batch size limits |  | ||||||
|      * Override in provider-specific implementations |  | ||||||
|      */ |  | ||||||
|     protected isBatchSizeError(error: unknown): boolean { |  | ||||||
|         return checkBatchSizeError(error); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Generate embeddings for multiple texts |  | ||||||
|      * Default implementation processes texts one by one |  | ||||||
|      */ |  | ||||||
|     async generateBatchEmbeddings(texts: string[]): Promise<Float32Array[]> { |  | ||||||
|         if (texts.length === 0) { |  | ||||||
|             return []; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             return await this.processWithAdaptiveBatch( |  | ||||||
|                 texts, |  | ||||||
|                 async (batch) => { |  | ||||||
|                     const batchResults = await Promise.all( |  | ||||||
|                         batch.map(text => this.generateEmbeddings(text)) |  | ||||||
|                     ); |  | ||||||
|                     return batchResults; |  | ||||||
|                 }, |  | ||||||
|                 this.isBatchSizeError.bind(this) |  | ||||||
|             ); |  | ||||||
|         } |  | ||||||
|         catch (error) { |  | ||||||
|             const errorMessage = (error as Error).message || "Unknown error"; |  | ||||||
|             log.error(`Batch embedding error for provider ${this.name}: ${errorMessage}`); |  | ||||||
|             throw new Error(`${this.name} batch embedding error: ${errorMessage}`); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Generate embeddings for a note with its context |  | ||||||
|      */ |  | ||||||
|     async generateNoteEmbeddings(context: NoteEmbeddingContext): Promise<Float32Array> { |  | ||||||
|         const text = [context.title || "", context.content || ""].filter(Boolean).join(" "); |  | ||||||
|         return this.generateEmbeddings(text); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Generate embeddings for multiple notes with their contexts |  | ||||||
|      */ |  | ||||||
|     async generateBatchNoteEmbeddings(contexts: NoteEmbeddingContext[]): Promise<Float32Array[]> { |  | ||||||
|         if (contexts.length === 0) { |  | ||||||
|             return []; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             return await this.processWithAdaptiveBatch( |  | ||||||
|                 contexts, |  | ||||||
|                 async (batch) => { |  | ||||||
|                     const batchResults = await Promise.all( |  | ||||||
|                         batch.map(context => this.generateNoteEmbeddings(context)) |  | ||||||
|                     ); |  | ||||||
|                     return batchResults; |  | ||||||
|                 }, |  | ||||||
|                 this.isBatchSizeError.bind(this) |  | ||||||
|             ); |  | ||||||
|         } |  | ||||||
|         catch (error) { |  | ||||||
|             const errorMessage = (error as Error).message || "Unknown error"; |  | ||||||
|             log.error(`Batch note embedding error for provider ${this.name}: ${errorMessage}`); |  | ||||||
|             throw new Error(`${this.name} batch note embedding error: ${errorMessage}`); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Cleans and normalizes text for embeddings by removing excessive whitespace |  | ||||||
|      */ |  | ||||||
|     private cleanText(text: string): string { |  | ||||||
|         return text.replace(/\s+/g, ' ').trim(); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Generates a rich text representation of a note's context for embedding |  | ||||||
|      */ |  | ||||||
|     protected generateNoteContextText(context: NoteEmbeddingContext): string { |  | ||||||
|         // Build a relationship-focused summary first |  | ||||||
|         const relationshipSummary: string[] = []; |  | ||||||
|  |  | ||||||
|         // Summarize the note's place in the hierarchy |  | ||||||
|         if (context.parentTitles.length > 0) { |  | ||||||
|             relationshipSummary.push(`This note is a child of: ${context.parentTitles.map(t => this.cleanText(t)).join(', ')}.`); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (context.childTitles.length > 0) { |  | ||||||
|             relationshipSummary.push(`This note has children: ${context.childTitles.map(t => this.cleanText(t)).join(', ')}.`); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Emphasize relationships with other notes |  | ||||||
|         if (context.relatedNotes && context.relatedNotes.length > 0) { |  | ||||||
|             // Group by relation type for better understanding |  | ||||||
|             const relationsByType: Record<string, string[]> = {}; |  | ||||||
|             for (const rel of context.relatedNotes) { |  | ||||||
|                 if (!relationsByType[rel.relationName]) { |  | ||||||
|                     relationsByType[rel.relationName] = []; |  | ||||||
|                 } |  | ||||||
|                 relationsByType[rel.relationName].push(this.cleanText(rel.targetTitle)); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             for (const [relType, targets] of Object.entries(relationsByType)) { |  | ||||||
|                 relationshipSummary.push(`This note has ${relType} relationship with: ${targets.join(', ')}.`); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Emphasize backlinks for bidirectional relationships |  | ||||||
|         if (context.backlinks && context.backlinks.length > 0) { |  | ||||||
|             // Group by relation type |  | ||||||
|             const backlinksByType: Record<string, string[]> = {}; |  | ||||||
|             for (const link of context.backlinks) { |  | ||||||
|                 if (!backlinksByType[link.relationName]) { |  | ||||||
|                     backlinksByType[link.relationName] = []; |  | ||||||
|                 } |  | ||||||
|                 backlinksByType[link.relationName].push(this.cleanText(link.sourceTitle)); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             for (const [relType, sources] of Object.entries(backlinksByType)) { |  | ||||||
|                 relationshipSummary.push(`This note is ${relType} of: ${sources.join(', ')}.`); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Emphasize templates/inheritance |  | ||||||
|         if (context.templateTitles && context.templateTitles.length > 0) { |  | ||||||
|             relationshipSummary.push(`This note inherits from: ${context.templateTitles.map(t => this.cleanText(t)).join(', ')}.`); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Start with core note information |  | ||||||
|         let result = |  | ||||||
|             `Title: ${this.cleanText(context.title)}\n` + |  | ||||||
|             `Type: ${context.type}\n` + |  | ||||||
|             `MIME: ${context.mime}\n`; |  | ||||||
|  |  | ||||||
|         // Add the relationship summary at the beginning for emphasis |  | ||||||
|         if (relationshipSummary.length > 0) { |  | ||||||
|             result += `Relationships: ${relationshipSummary.join(' ')}\n`; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Continue with dates |  | ||||||
|         result += |  | ||||||
|             `Created: ${context.dateCreated}\n` + |  | ||||||
|             `Modified: ${context.dateModified}\n`; |  | ||||||
|  |  | ||||||
|         // Add attributes in a concise format |  | ||||||
|         if (context.attributes.length > 0) { |  | ||||||
|             result += 'Attributes: '; |  | ||||||
|             const attributeTexts = context.attributes.map(attr => |  | ||||||
|                 `${attr.type}:${attr.name}=${this.cleanText(attr.value)}` |  | ||||||
|             ); |  | ||||||
|             result += attributeTexts.join('; ') + '\n'; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Add important label values concisely |  | ||||||
|         if (context.labelValues && Object.keys(context.labelValues).length > 0) { |  | ||||||
|             result += 'Labels: '; |  | ||||||
|             const labelTexts = Object.entries(context.labelValues).map(([name, value]) => |  | ||||||
|                 `${name}=${this.cleanText(value)}` |  | ||||||
|             ); |  | ||||||
|             result += labelTexts.join('; ') + '\n'; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Parents, children, templates, relations, and backlinks are now handled in the relationship summary |  | ||||||
|         // But we'll include them again in a more structured format for organization |  | ||||||
|  |  | ||||||
|         if (context.parentTitles.length > 0) { |  | ||||||
|             result += `Parents: ${context.parentTitles.map(t => this.cleanText(t)).join('; ')}\n`; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (context.childTitles.length > 0) { |  | ||||||
|             result += `Children: ${context.childTitles.map(t => this.cleanText(t)).join('; ')}\n`; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (context.templateTitles && context.templateTitles.length > 0) { |  | ||||||
|             result += `Templates: ${context.templateTitles.map(t => this.cleanText(t)).join('; ')}\n`; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (context.relatedNotes && context.relatedNotes.length > 0) { |  | ||||||
|             result += 'Related: '; |  | ||||||
|             const relatedTexts = context.relatedNotes.map(rel => |  | ||||||
|                 `${rel.relationName}→${this.cleanText(rel.targetTitle)}` |  | ||||||
|             ); |  | ||||||
|             result += relatedTexts.join('; ') + '\n'; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (context.backlinks && context.backlinks.length > 0) { |  | ||||||
|             result += 'Referenced By: '; |  | ||||||
|             const backlinkTexts = context.backlinks.map(link => |  | ||||||
|                 `${this.cleanText(link.sourceTitle)}→${link.relationName}` |  | ||||||
|             ); |  | ||||||
|             result += backlinkTexts.join('; ') + '\n'; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Add attachments concisely |  | ||||||
|         if (context.attachments.length > 0) { |  | ||||||
|             result += 'Attachments: '; |  | ||||||
|             const attachmentTexts = context.attachments.map(att => |  | ||||||
|                 `${this.cleanText(att.title)}(${att.mime})` |  | ||||||
|             ); |  | ||||||
|             result += attachmentTexts.join('; ') + '\n'; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Add content (already cleaned in getNoteEmbeddingContext) |  | ||||||
|         result += `Content: ${context.content}`; |  | ||||||
|  |  | ||||||
|         return result; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Process a batch of items with automatic retries and batch size adjustment |  | ||||||
|      */ |  | ||||||
|     protected async processBatchWithRetries<T>( |  | ||||||
|         items: T[], |  | ||||||
|         processFn: (batch: T[]) => Promise<Float32Array[]>, |  | ||||||
|         isBatchSizeError: (error: unknown) => boolean = this.isBatchSizeError.bind(this) |  | ||||||
|     ): Promise<Float32Array[]> { |  | ||||||
|         const results: Float32Array[] = []; |  | ||||||
|         const failures: { index: number, error: string }[] = []; |  | ||||||
|         let currentBatchSize = await this.getBatchSize(); |  | ||||||
|         let lastError: Error | null = null; |  | ||||||
|  |  | ||||||
|         // Process items in batches |  | ||||||
|         for (let i = 0; i < items.length;) { |  | ||||||
|             const batch = items.slice(i, i + currentBatchSize); |  | ||||||
|  |  | ||||||
|             try { |  | ||||||
|                 // Process the current batch |  | ||||||
|                 const batchResults = await processFn(batch); |  | ||||||
|                 results.push(...batchResults); |  | ||||||
|                 i += batch.length; |  | ||||||
|             } |  | ||||||
|             catch (error) { |  | ||||||
|                 lastError = error as Error; |  | ||||||
|                 const errorMessage = lastError.message || 'Unknown error'; |  | ||||||
|  |  | ||||||
|                 // Check if this is a batch size related error |  | ||||||
|                 if (isBatchSizeError(error) && currentBatchSize > 1) { |  | ||||||
|                     // Reduce batch size and retry |  | ||||||
|                     const newBatchSize = Math.max(1, Math.floor(currentBatchSize / 2)); |  | ||||||
|                     console.warn(`Batch size error detected, reducing batch size from ${currentBatchSize} to ${newBatchSize}: ${errorMessage}`); |  | ||||||
|                     currentBatchSize = newBatchSize; |  | ||||||
|                 } |  | ||||||
|                 else if (currentBatchSize === 1) { |  | ||||||
|                     // If we're already at batch size 1, we can't reduce further, so log the error and skip this item |  | ||||||
|                     console.error(`Error processing item at index ${i} with batch size 1: ${errorMessage}`); |  | ||||||
|                     failures.push({ index: i, error: errorMessage }); |  | ||||||
|                     i++; // Move to the next item |  | ||||||
|                 } |  | ||||||
|                 else { |  | ||||||
|                     // For other errors, retry with a smaller batch size as a precaution |  | ||||||
|                     const newBatchSize = Math.max(1, Math.floor(currentBatchSize / 2)); |  | ||||||
|                     console.warn(`Error processing batch, reducing batch size from ${currentBatchSize} to ${newBatchSize} as a precaution: ${errorMessage}`); |  | ||||||
|                     currentBatchSize = newBatchSize; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // If all items failed and we have a last error, throw it |  | ||||||
|         if (results.length === 0 && failures.length > 0 && lastError) { |  | ||||||
|             throw lastError; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // If some items failed but others succeeded, log the summary |  | ||||||
|         if (failures.length > 0) { |  | ||||||
|             console.warn(`Processed ${results.length} items successfully, but ${failures.length} items failed`); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         return results; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @@ -1,25 +0,0 @@ | |||||||
| import type { NoteEmbeddingContext } from "../types.js"; |  | ||||||
| import type { EmbeddingProvider } from "../embeddings_interface.js"; |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Interface for chunking operations |  | ||||||
|  */ |  | ||||||
| export interface ChunkingOperations { |  | ||||||
|     /** |  | ||||||
|      * Process a large note by breaking it into chunks and creating embeddings for each chunk |  | ||||||
|      */ |  | ||||||
|     processNoteWithChunking( |  | ||||||
|         noteId: string, |  | ||||||
|         provider: EmbeddingProvider, |  | ||||||
|         context: NoteEmbeddingContext |  | ||||||
|     ): Promise<void>; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Get the chunking operations instance |  | ||||||
|  * This function is implemented to break circular dependencies |  | ||||||
|  */ |  | ||||||
| export async function getChunkingOperations(): Promise<ChunkingOperations> { |  | ||||||
|     const chunking = await import('./chunking_processor.js'); |  | ||||||
|     return chunking; |  | ||||||
| } |  | ||||||
| @@ -1,477 +0,0 @@ | |||||||
| import log from "../../../log.js"; |  | ||||||
| import dateUtils from "../../../date_utils.js"; |  | ||||||
| import sql from "../../../sql.js"; |  | ||||||
| import becca from "../../../../becca/becca.js"; |  | ||||||
| import cls from "../../../../services/cls.js"; |  | ||||||
| import type { NoteEmbeddingContext } from "../types.js"; |  | ||||||
| import type { EmbeddingProvider } from "../embeddings_interface.js"; |  | ||||||
| import type { EmbeddingConfig } from "../embeddings_interface.js"; |  | ||||||
| import { LLM_CONSTANTS } from "../../../llm/constants/provider_constants.js"; |  | ||||||
| import { EMBEDDING_PROCESSING } from '../../constants/search_constants.js'; |  | ||||||
|  |  | ||||||
| // Define error categories for better handling |  | ||||||
| const ERROR_CATEGORIES = { |  | ||||||
|     // Temporary errors that should be retried |  | ||||||
|     TEMPORARY: { |  | ||||||
|         patterns: [ |  | ||||||
|             'timeout', 'connection', 'network', 'rate limit', 'try again', |  | ||||||
|             'service unavailable', 'too many requests', 'server error', |  | ||||||
|             'gateway', 'temporarily', 'overloaded' |  | ||||||
|         ] |  | ||||||
|     }, |  | ||||||
|     // Permanent errors that should not be retried |  | ||||||
|     PERMANENT: { |  | ||||||
|         patterns: [ |  | ||||||
|             'invalid request', 'invalid content', 'not found', 'unsupported model', |  | ||||||
|             'invalid model', 'content policy', 'forbidden', 'unauthorized', |  | ||||||
|             'token limit', 'context length', 'too long', 'content violation' |  | ||||||
|         ] |  | ||||||
|     } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| // Maximum time (in milliseconds) allowed for the entire chunking process |  | ||||||
| const MAX_TOTAL_PROCESSING_TIME = EMBEDDING_PROCESSING.MAX_TOTAL_PROCESSING_TIME; |  | ||||||
|  |  | ||||||
| // Maximum number of retry attempts per chunk |  | ||||||
| const MAX_CHUNK_RETRY_ATTEMPTS = EMBEDDING_PROCESSING.MAX_CHUNK_RETRY_ATTEMPTS; |  | ||||||
|  |  | ||||||
| // Maximum time per chunk processing (to prevent individual chunks from hanging) |  | ||||||
| const DEFAULT_MAX_CHUNK_PROCESSING_TIME = EMBEDDING_PROCESSING.DEFAULT_MAX_CHUNK_PROCESSING_TIME; |  | ||||||
| const OLLAMA_MAX_CHUNK_PROCESSING_TIME = EMBEDDING_PROCESSING.OLLAMA_MAX_CHUNK_PROCESSING_TIME; |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Interface for chunks from the chunking process |  | ||||||
|  */ |  | ||||||
| interface ContentChunk { |  | ||||||
|     content: string; |  | ||||||
|     index: number; |  | ||||||
|     metadata?: Record<string, unknown>; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Categorize an error as temporary or permanent based on its message |  | ||||||
|  * @param errorMessage - The error message to categorize |  | ||||||
|  * @returns 'temporary', 'permanent', or 'unknown' |  | ||||||
|  */ |  | ||||||
| function categorizeError(errorMessage: string): 'temporary' | 'permanent' | 'unknown' { |  | ||||||
|     const lowerCaseMessage = errorMessage.toLowerCase(); |  | ||||||
|  |  | ||||||
|     // Check for temporary error patterns |  | ||||||
|     for (const pattern of ERROR_CATEGORIES.TEMPORARY.patterns) { |  | ||||||
|         if (lowerCaseMessage.includes(pattern.toLowerCase())) { |  | ||||||
|             return 'temporary'; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Check for permanent error patterns |  | ||||||
|     for (const pattern of ERROR_CATEGORIES.PERMANENT.patterns) { |  | ||||||
|         if (lowerCaseMessage.includes(pattern.toLowerCase())) { |  | ||||||
|             return 'permanent'; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Default to unknown |  | ||||||
|     return 'unknown'; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Process a chunk with a timeout to prevent hanging |  | ||||||
|  * @param provider - The embedding provider |  | ||||||
|  * @param chunk - The chunk to process |  | ||||||
|  * @param timeoutMs - Timeout in milliseconds |  | ||||||
|  * @returns The generated embedding |  | ||||||
|  */ |  | ||||||
| async function processChunkWithTimeout( |  | ||||||
|     provider: EmbeddingProvider, |  | ||||||
|     chunk: { content: string }, |  | ||||||
|     timeoutMs: number |  | ||||||
| ): Promise<Float32Array> { |  | ||||||
|     // Create a promise that rejects after the timeout |  | ||||||
|     const timeoutPromise = new Promise<never>((_, reject) => { |  | ||||||
|         setTimeout(() => { |  | ||||||
|             reject(new Error(`Chunk processing timed out after ${timeoutMs}ms`)); |  | ||||||
|         }, timeoutMs); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     // Create the actual processing promise |  | ||||||
|     const processingPromise = provider.generateEmbeddings(chunk.content); |  | ||||||
|  |  | ||||||
|     // Race the two promises - whichever completes/rejects first wins |  | ||||||
|     return Promise.race([processingPromise, timeoutPromise]); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Process a large note by breaking it into chunks and creating embeddings for each chunk |  | ||||||
|  * This provides more detailed and focused embeddings for different parts of large notes |  | ||||||
|  * |  | ||||||
|  * @param noteId - The ID of the note to process |  | ||||||
|  * @param provider - The embedding provider to use |  | ||||||
|  * @param context - The note context data |  | ||||||
|  */ |  | ||||||
| export async function processNoteWithChunking( |  | ||||||
|     noteId: string, |  | ||||||
|     provider: EmbeddingProvider, |  | ||||||
|     context: NoteEmbeddingContext |  | ||||||
| ): Promise<void> { |  | ||||||
|     // Track the overall start time |  | ||||||
|     const startTime = Date.now(); |  | ||||||
|  |  | ||||||
|     try { |  | ||||||
|         // Get the context extractor dynamically to avoid circular dependencies |  | ||||||
|         const { ContextExtractor } = await import('../../context/index.js'); |  | ||||||
|         const contextExtractor = new ContextExtractor(); |  | ||||||
|  |  | ||||||
|         // Get note from becca |  | ||||||
|         const note = becca.notes[noteId]; |  | ||||||
|         if (!note) { |  | ||||||
|             throw new Error(`Note ${noteId} not found in Becca cache`); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Use semantic chunking for better boundaries |  | ||||||
|         const chunks = await contextExtractor.semanticChunking( |  | ||||||
|             context.content, |  | ||||||
|             note.title, |  | ||||||
|             noteId, |  | ||||||
|             { |  | ||||||
|                 // Adjust chunk size based on provider using constants |  | ||||||
|                 maxChunkSize: provider.name === 'ollama' ? |  | ||||||
|                     LLM_CONSTANTS.CHUNKING.OLLAMA_SIZE : |  | ||||||
|                     LLM_CONSTANTS.CHUNKING.DEFAULT_SIZE, |  | ||||||
|                 respectBoundaries: true |  | ||||||
|             } |  | ||||||
|         ); |  | ||||||
|  |  | ||||||
|         if (!chunks || chunks.length === 0) { |  | ||||||
|             // Fall back to single embedding if chunking fails |  | ||||||
|             await cls.init(async () => { |  | ||||||
|                 const embedding = await provider.generateEmbeddings(context.content); |  | ||||||
|                 const config = provider.getConfig(); |  | ||||||
|  |  | ||||||
|                 // Use dynamic import instead of static import |  | ||||||
|                 const storage = await import('../storage.js'); |  | ||||||
|                 await storage.storeNoteEmbedding(noteId, provider.name, config.model, embedding); |  | ||||||
|             }); |  | ||||||
|  |  | ||||||
|             log.info(`Generated single embedding for note ${noteId} (${note.title}) since chunking failed`); |  | ||||||
|             return; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Generate and store embeddings for each chunk |  | ||||||
|         const config = provider.getConfig(); |  | ||||||
|  |  | ||||||
|         // Delete existing embeddings first to avoid duplicates |  | ||||||
|         // Use dynamic import |  | ||||||
|         const storage = await import('../storage.js'); |  | ||||||
|         await storage.deleteNoteEmbeddings(noteId, provider.name, config.model); |  | ||||||
|  |  | ||||||
|         // Track successful and failed chunks in memory during this processing run |  | ||||||
|         let successfulChunks = 0; |  | ||||||
|         let failedChunks = 0; |  | ||||||
|         const totalChunks = chunks.length; |  | ||||||
|         const failedChunkDetails: { |  | ||||||
|             index: number, |  | ||||||
|             error: string, |  | ||||||
|             category: 'temporary' | 'permanent' | 'unknown', |  | ||||||
|             attempts: number |  | ||||||
|         }[] = []; |  | ||||||
|         const retryQueue: { |  | ||||||
|             index: number, |  | ||||||
|             chunk: any, |  | ||||||
|             attempts: number |  | ||||||
|         }[] = []; |  | ||||||
|  |  | ||||||
|         log.info(`Processing ${chunks.length} chunks for note ${noteId} (${note.title})`); |  | ||||||
|  |  | ||||||
|         // Get the current time to prevent duplicate processing from timeouts |  | ||||||
|         const processingStartTime = Date.now(); |  | ||||||
|         const processingId = `${noteId}-${processingStartTime}`; |  | ||||||
|         log.info(`Starting processing run ${processingId}`); |  | ||||||
|  |  | ||||||
|         // Process each chunk with a delay based on provider to avoid rate limits |  | ||||||
|         for (let i = 0; i < chunks.length; i++) { |  | ||||||
|             // Check if we've exceeded the overall time limit |  | ||||||
|             if (Date.now() - startTime > MAX_TOTAL_PROCESSING_TIME) { |  | ||||||
|                 log.info(`Exceeded maximum processing time (${MAX_TOTAL_PROCESSING_TIME}ms) for note ${noteId}, stopping after ${i} chunks`); |  | ||||||
|  |  | ||||||
|                 // Mark remaining chunks as failed due to timeout |  | ||||||
|                 for (let j = i; j < chunks.length; j++) { |  | ||||||
|                     failedChunks++; |  | ||||||
|                     failedChunkDetails.push({ |  | ||||||
|                         index: j + 1, |  | ||||||
|                         error: "Processing timeout - exceeded total allowed time", |  | ||||||
|                         category: 'temporary', |  | ||||||
|                         attempts: 1 |  | ||||||
|                     }); |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 // Break the loop, we'll handle this as partial success if some chunks succeeded |  | ||||||
|                 break; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             const chunk = chunks[i]; |  | ||||||
|             try { |  | ||||||
|                 // Generate embedding for this chunk's content with a timeout |  | ||||||
|                 await cls.init(async () => { |  | ||||||
|                     const embedding = await processChunkWithTimeout( |  | ||||||
|                         provider, |  | ||||||
|                         chunk, |  | ||||||
|                         provider.name === 'ollama' ? OLLAMA_MAX_CHUNK_PROCESSING_TIME : DEFAULT_MAX_CHUNK_PROCESSING_TIME |  | ||||||
|                     ); |  | ||||||
|  |  | ||||||
|                     // Store with chunk information in a unique ID format |  | ||||||
|                     const chunkIdSuffix = `${i + 1}_of_${chunks.length}`; |  | ||||||
|                     await storage.storeNoteEmbedding( |  | ||||||
|                         noteId, |  | ||||||
|                         provider.name, |  | ||||||
|                         config.model, |  | ||||||
|                         embedding |  | ||||||
|                     ); |  | ||||||
|                 }); |  | ||||||
|  |  | ||||||
|                 successfulChunks++; |  | ||||||
|  |  | ||||||
|                 // Small delay between chunks to avoid rate limits - longer for Ollama |  | ||||||
|                 if (i < chunks.length - 1) { |  | ||||||
|                     await new Promise(resolve => setTimeout(resolve, |  | ||||||
|                         provider.name === 'ollama' ? 2000 : 100)); |  | ||||||
|                 } |  | ||||||
|             } catch (error: any) { |  | ||||||
|                 const errorMessage = error.message || 'Unknown error'; |  | ||||||
|                 const errorCategory = categorizeError(errorMessage); |  | ||||||
|  |  | ||||||
|                 // Track the failure for this specific chunk |  | ||||||
|                 failedChunks++; |  | ||||||
|                 failedChunkDetails.push({ |  | ||||||
|                     index: i + 1, |  | ||||||
|                     error: errorMessage, |  | ||||||
|                     category: errorCategory, |  | ||||||
|                     attempts: 1 |  | ||||||
|                 }); |  | ||||||
|  |  | ||||||
|                 // Only add to retry queue if not a permanent error |  | ||||||
|                 if (errorCategory !== 'permanent') { |  | ||||||
|                     retryQueue.push({ |  | ||||||
|                         index: i, |  | ||||||
|                         chunk: chunk, |  | ||||||
|                         attempts: 1 |  | ||||||
|                     }); |  | ||||||
|                 } else { |  | ||||||
|                     log.info(`Chunk ${i + 1} for note ${noteId} has permanent error, skipping retries: ${errorMessage}`); |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 log.error(`Error processing chunk ${i + 1} for note ${noteId} (${errorCategory} error): ${errorMessage}`); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Set a time limit for the retry phase |  | ||||||
|         const retryStartTime = Date.now(); |  | ||||||
|         const MAX_RETRY_TIME = 2 * 60 * 1000; // 2 minutes for all retries |  | ||||||
|  |  | ||||||
|         // Retry failed chunks with exponential backoff, but only those that aren't permanent errors |  | ||||||
|         if (retryQueue.length > 0 && retryQueue.length < chunks.length) { |  | ||||||
|             log.info(`Retrying ${retryQueue.length} failed chunks for note ${noteId}`); |  | ||||||
|  |  | ||||||
|             for (let j = 0; j < retryQueue.length; j++) { |  | ||||||
|                 // Check if we've exceeded the retry time limit |  | ||||||
|                 if (Date.now() - retryStartTime > MAX_RETRY_TIME) { |  | ||||||
|                     log.info(`Exceeded maximum retry time (${MAX_RETRY_TIME}ms) for note ${noteId}, stopping after ${j} retries`); |  | ||||||
|                     break; |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 const item = retryQueue[j]; |  | ||||||
|  |  | ||||||
|                 // Skip if we've already reached the max retry attempts for this chunk |  | ||||||
|                 if (item.attempts >= MAX_CHUNK_RETRY_ATTEMPTS) { |  | ||||||
|                     log.info(`Skipping chunk ${item.index + 1} for note ${noteId} as it reached maximum retry attempts (${MAX_CHUNK_RETRY_ATTEMPTS})`); |  | ||||||
|                     continue; |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 try { |  | ||||||
|                     // Wait longer for retries with exponential backoff |  | ||||||
|                     await new Promise(resolve => setTimeout(resolve, 1000 * Math.pow(1.5, j))); |  | ||||||
|  |  | ||||||
|                     // Retry the embedding with timeout using cls.init |  | ||||||
|                     await cls.init(async () => { |  | ||||||
|                         const embedding = await processChunkWithTimeout( |  | ||||||
|                             provider, |  | ||||||
|                             item.chunk, |  | ||||||
|                             provider.name === 'ollama' ? OLLAMA_MAX_CHUNK_PROCESSING_TIME : DEFAULT_MAX_CHUNK_PROCESSING_TIME |  | ||||||
|                         ); |  | ||||||
|  |  | ||||||
|                         // Store with unique ID that indicates it was a retry |  | ||||||
|                         const chunkIdSuffix = `${item.index + 1}_of_${chunks.length}`; |  | ||||||
|                         const storage = await import('../storage.js'); |  | ||||||
|                         await storage.storeNoteEmbedding( |  | ||||||
|                             noteId, |  | ||||||
|                             provider.name, |  | ||||||
|                             config.model, |  | ||||||
|                             embedding |  | ||||||
|                         ); |  | ||||||
|                     }); |  | ||||||
|  |  | ||||||
|                     // Update counters |  | ||||||
|                     successfulChunks++; |  | ||||||
|                     failedChunks--; |  | ||||||
|  |  | ||||||
|                     // Remove from failedChunkDetails |  | ||||||
|                     const detailIndex = failedChunkDetails.findIndex(d => d.index === item.index + 1); |  | ||||||
|                     if (detailIndex >= 0) { |  | ||||||
|                         failedChunkDetails.splice(detailIndex, 1); |  | ||||||
|                     } |  | ||||||
|  |  | ||||||
|                     log.info(`Successfully retried chunk ${item.index + 1} for note ${noteId} on attempt ${item.attempts + 1}`); |  | ||||||
|                 } catch (error: any) { |  | ||||||
|                     const errorMessage = error.message || 'Unknown error'; |  | ||||||
|                     const errorCategory = categorizeError(errorMessage); |  | ||||||
|  |  | ||||||
|                     // Update failure record with new attempt count |  | ||||||
|                     const detailIndex = failedChunkDetails.findIndex(d => d.index === item.index + 1); |  | ||||||
|                     if (detailIndex >= 0) { |  | ||||||
|                         failedChunkDetails[detailIndex].attempts++; |  | ||||||
|                         failedChunkDetails[detailIndex].error = errorMessage; |  | ||||||
|                         failedChunkDetails[detailIndex].category = errorCategory; |  | ||||||
|                     } |  | ||||||
|  |  | ||||||
|                     log.error(`Retry failed for chunk ${item.index + 1} of note ${noteId} (${errorCategory} error): ${errorMessage}`); |  | ||||||
|  |  | ||||||
|                     // For timeout errors, mark as permanent to avoid further retries |  | ||||||
|                     if (errorMessage.includes('timed out')) { |  | ||||||
|                         if (detailIndex >= 0) { |  | ||||||
|                             failedChunkDetails[detailIndex].category = 'permanent'; |  | ||||||
|                         } |  | ||||||
|                         log.info(`Chunk ${item.index + 1} for note ${noteId} timed out, marking as permanent failure`); |  | ||||||
|                     } |  | ||||||
|                     // Add to retry queue again only if it's not a permanent error and hasn't reached the max attempts |  | ||||||
|                     else if (errorCategory !== 'permanent' && item.attempts + 1 < MAX_CHUNK_RETRY_ATTEMPTS) { |  | ||||||
|                         // If we're still below MAX_CHUNK_RETRY_ATTEMPTS, we'll try again in the next cycle |  | ||||||
|                         item.attempts++; |  | ||||||
|                     } else if (errorCategory === 'permanent') { |  | ||||||
|                         log.info(`Chunk ${item.index + 1} for note ${noteId} will not be retried further due to permanent error`); |  | ||||||
|                     } else { |  | ||||||
|                         log.info(`Chunk ${item.index + 1} for note ${noteId} reached maximum retry attempts (${MAX_CHUNK_RETRY_ATTEMPTS})`); |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Log information about the processed chunks |  | ||||||
|         if (successfulChunks > 0) { |  | ||||||
|             log.info(`[${processingId}] Generated ${successfulChunks} chunk embeddings for note ${noteId} (${note.title})`); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (failedChunks > 0) { |  | ||||||
|             // Count permanent vs temporary errors |  | ||||||
|             const permanentErrors = failedChunkDetails.filter(d => d.category === 'permanent').length; |  | ||||||
|             const temporaryErrors = failedChunkDetails.filter(d => d.category === 'temporary').length; |  | ||||||
|             const unknownErrors = failedChunkDetails.filter(d => d.category === 'unknown').length; |  | ||||||
|  |  | ||||||
|             log.info(`[${processingId}] Failed to generate ${failedChunks} chunk embeddings for note ${noteId} (${note.title}). ` + |  | ||||||
|                     `Permanent: ${permanentErrors}, Temporary: ${temporaryErrors}, Unknown: ${unknownErrors}`); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Calculate the failure ratio |  | ||||||
|         const failureRatio = failedChunks / totalChunks; |  | ||||||
|  |  | ||||||
|         // If no chunks were successfully processed, or if more than 50% failed, mark the entire note as failed |  | ||||||
|         if (successfulChunks === 0 || failureRatio > 0.5) { |  | ||||||
|             // Check if all failures are permanent |  | ||||||
|             const allPermanent = failedChunkDetails.every(d => d.category === 'permanent'); |  | ||||||
|             const errorType = allPermanent ? 'permanent' : (failureRatio > 0.5 ? 'too_many_failures' : 'all_failed'); |  | ||||||
|  |  | ||||||
|             // Mark this note as failed in the embedding_queue table with a permanent error status |  | ||||||
|             const now = dateUtils.utcNowDateTime(); |  | ||||||
|             const errorSummary = `Note embedding failed: ${failedChunks}/${totalChunks} chunks failed (${errorType}). First error: ${failedChunkDetails[0]?.error}`; |  | ||||||
|  |  | ||||||
|             await sql.execute(` |  | ||||||
|                 UPDATE embedding_queue |  | ||||||
|                 SET error = ?, lastAttempt = ?, attempts = 999 |  | ||||||
|                 WHERE noteId = ? |  | ||||||
|             `, [errorSummary, now, noteId]); |  | ||||||
|  |  | ||||||
|             throw new Error(errorSummary); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // If some chunks failed but others succeeded, log a warning but consider the processing complete |  | ||||||
|         // The note will be removed from the queue, but we'll store error information |  | ||||||
|         if (failedChunks > 0 && successfulChunks > 0) { |  | ||||||
|             // Create detailed error summary |  | ||||||
|             const permanentErrors = failedChunkDetails.filter(d => d.category === 'permanent').length; |  | ||||||
|             const temporaryErrors = failedChunkDetails.filter(d => d.category === 'temporary').length; |  | ||||||
|             const unknownErrors = failedChunkDetails.filter(d => d.category === 'unknown').length; |  | ||||||
|  |  | ||||||
|             const errorSummary = `Note processed partially: ${successfulChunks}/${totalChunks} chunks succeeded, ` + |  | ||||||
|                                `${failedChunks}/${totalChunks} failed (${permanentErrors} permanent, ${temporaryErrors} temporary, ${unknownErrors} unknown)`; |  | ||||||
|             log.info(errorSummary); |  | ||||||
|  |  | ||||||
|             // Store a summary in the error field of embedding_queue |  | ||||||
|             // This is just for informational purposes - the note will be removed from the queue |  | ||||||
|             const now = dateUtils.utcNowDateTime(); |  | ||||||
|             await sql.execute(` |  | ||||||
|                 UPDATE embedding_queue |  | ||||||
|                 SET error = ?, lastAttempt = ? |  | ||||||
|                 WHERE noteId = ? |  | ||||||
|             `, [errorSummary, now, noteId]); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Track total processing time |  | ||||||
|         const totalTime = Date.now() - startTime; |  | ||||||
|         log.info(`[${processingId}] Total processing time for note ${noteId}: ${totalTime}ms`); |  | ||||||
|  |  | ||||||
|     } catch (error: any) { |  | ||||||
|         log.error(`Error in chunked embedding process for note ${noteId}: ${error.message || 'Unknown error'}`); |  | ||||||
|         throw error; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Process a chunk with retry logic to handle errors |  | ||||||
|  * @param index - The chunk index for tracking |  | ||||||
|  * @param chunk - The content chunk |  | ||||||
|  * @param provider - The embedding provider |  | ||||||
|  * @param noteId - ID of the note being processed |  | ||||||
|  * @param config - Embedding configuration |  | ||||||
|  * @param startTime - When the overall process started |  | ||||||
|  * @param storage - The storage module |  | ||||||
|  * @param maxTimePerChunk - Max time per chunk processing |  | ||||||
|  * @param retryAttempt - Current retry attempt number |  | ||||||
|  */ |  | ||||||
| async function processChunkWithRetry( |  | ||||||
|     index: number, |  | ||||||
|     chunk: ContentChunk, |  | ||||||
|     provider: EmbeddingProvider, |  | ||||||
|     noteId: string, |  | ||||||
|     config: EmbeddingConfig, |  | ||||||
|     startTime: number, |  | ||||||
|     storage: typeof import('../storage.js'), |  | ||||||
|     maxTimePerChunk: number, |  | ||||||
|     retryAttempt = 0 |  | ||||||
| ): Promise<boolean> { |  | ||||||
|     try { |  | ||||||
|         // Try to generate embedding with timeout |  | ||||||
|         const embedding = await processChunkWithTimeout(provider, chunk, maxTimePerChunk); |  | ||||||
|  |  | ||||||
|         // Store the embedding with the chunk ID |  | ||||||
|         const chunkId = `${noteId}_chunk${index}`; |  | ||||||
|         await storage.storeNoteEmbedding(chunkId, provider.name, config.model, embedding); |  | ||||||
|  |  | ||||||
|         return true; |  | ||||||
|     } catch (error: unknown) { |  | ||||||
|         const errorMessage = error instanceof Error ? error.message : String(error); |  | ||||||
|         const category = categorizeError(errorMessage); |  | ||||||
|  |  | ||||||
|         // If we haven't exceeded the retry limit and it's a temporary error, retry |  | ||||||
|         if (retryAttempt < MAX_CHUNK_RETRY_ATTEMPTS && (category === 'temporary' || category === 'unknown')) { |  | ||||||
|             // Exponential backoff |  | ||||||
|             const delayMs = Math.min(1000 * Math.pow(2, retryAttempt), 15000); |  | ||||||
|             log.info(`Retrying chunk ${index} after ${delayMs}ms (attempt ${retryAttempt + 1}/${MAX_CHUNK_RETRY_ATTEMPTS})`); |  | ||||||
|             await new Promise(resolve => setTimeout(resolve, delayMs)); |  | ||||||
|  |  | ||||||
|             return processChunkWithRetry( |  | ||||||
|                 index, chunk, provider, noteId, config, startTime, storage, maxTimePerChunk, retryAttempt + 1 |  | ||||||
|             ); |  | ||||||
|         } else { |  | ||||||
|             log.error(`Failed to process chunk ${index} after ${retryAttempt + 1} attempts: ${errorMessage}`); |  | ||||||
|             return false; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @@ -1,327 +0,0 @@ | |||||||
| import becca from "../../../becca/becca.js"; |  | ||||||
| import type { NoteEmbeddingContext } from "./types.js"; |  | ||||||
| import sanitizeHtml from "sanitize-html"; |  | ||||||
| import type BNote from "../../../becca/entities/bnote.js"; |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Clean note content by removing HTML tags and normalizing whitespace |  | ||||||
|  */ |  | ||||||
| export async function cleanNoteContent(content: string, type: string, mime: string): Promise<string> { |  | ||||||
|     if (!content) return ''; |  | ||||||
|  |  | ||||||
|     // If it's HTML content, remove HTML tags |  | ||||||
|     if ((type === 'text' && mime === 'text/html') || content.includes('<div>') || content.includes('<p>')) { |  | ||||||
|         // Use sanitizeHtml to remove all HTML tags |  | ||||||
|         content = sanitizeHtml(content, { |  | ||||||
|             allowedTags: [], |  | ||||||
|             allowedAttributes: {}, |  | ||||||
|             textFilter: (text) => { |  | ||||||
|                 // Normalize the text, removing excessive whitespace |  | ||||||
|                 return text.replace(/\s+/g, ' '); |  | ||||||
|             } |  | ||||||
|         }); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Additional cleanup for any remaining HTML entities |  | ||||||
|     content = content |  | ||||||
|         .replace(/ /g, ' ') |  | ||||||
|         .replace(/</g, '<') |  | ||||||
|         .replace(/>/g, '>') |  | ||||||
|         .replace(/"/g, '"') |  | ||||||
|         .replace(/'/g, "'") |  | ||||||
|         .replace(/&/g, '&'); |  | ||||||
|  |  | ||||||
|     // Normalize whitespace (replace multiple spaces/newlines with single space) |  | ||||||
|     content = content.replace(/\s+/g, ' '); |  | ||||||
|  |  | ||||||
|     // Trim the content |  | ||||||
|     content = content.trim(); |  | ||||||
|  |  | ||||||
|     // Import constants directly |  | ||||||
|     const { LLM_CONSTANTS } = await import('../constants/provider_constants.js'); |  | ||||||
|     // Truncate if extremely long |  | ||||||
|     if (content.length > LLM_CONSTANTS.CONTENT.MAX_TOTAL_CONTENT_LENGTH) { |  | ||||||
|         content = content.substring(0, LLM_CONSTANTS.CONTENT.MAX_TOTAL_CONTENT_LENGTH) + ' [content truncated]'; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return content; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Extract content from different note types |  | ||||||
|  */ |  | ||||||
| export function extractStructuredContent(content: string, type: string, mime: string): string { |  | ||||||
|     try { |  | ||||||
|         if (!content) return ''; |  | ||||||
|  |  | ||||||
|         // Special handling based on note type |  | ||||||
|         switch (type) { |  | ||||||
|             case 'mindMap': |  | ||||||
|             case 'relationMap': |  | ||||||
|             case 'canvas': |  | ||||||
|                 if (mime === 'application/json') { |  | ||||||
|                     const jsonContent = JSON.parse(content); |  | ||||||
|  |  | ||||||
|                     if (type === 'canvas') { |  | ||||||
|                         // Extract text elements from canvas |  | ||||||
|                         if (jsonContent.elements && Array.isArray(jsonContent.elements)) { |  | ||||||
|                             const texts = jsonContent.elements |  | ||||||
|                                 .filter((element: any) => element.type === 'text' && element.text) |  | ||||||
|                                 .map((element: any) => element.text); |  | ||||||
|                             return texts.join('\n'); |  | ||||||
|                         } |  | ||||||
|                     } |  | ||||||
|                     else if (type === 'mindMap') { |  | ||||||
|                         // Extract node text from mind map |  | ||||||
|                         const extractMindMapNodes = (node: any): string[] => { |  | ||||||
|                             let texts: string[] = []; |  | ||||||
|                             if (node.text) { |  | ||||||
|                                 texts.push(node.text); |  | ||||||
|                             } |  | ||||||
|                             if (node.children && Array.isArray(node.children)) { |  | ||||||
|                                 for (const child of node.children) { |  | ||||||
|                                     texts = texts.concat(extractMindMapNodes(child)); |  | ||||||
|                                 } |  | ||||||
|                             } |  | ||||||
|                             return texts; |  | ||||||
|                         }; |  | ||||||
|  |  | ||||||
|                         if (jsonContent.root) { |  | ||||||
|                             return extractMindMapNodes(jsonContent.root).join('\n'); |  | ||||||
|                         } |  | ||||||
|                     } |  | ||||||
|                     else if (type === 'relationMap') { |  | ||||||
|                         // Extract relation map entities and connections |  | ||||||
|                         let result = ''; |  | ||||||
|  |  | ||||||
|                         if (jsonContent.notes && Array.isArray(jsonContent.notes)) { |  | ||||||
|                             result += 'Notes: ' + jsonContent.notes |  | ||||||
|                                 .map((note: any) => note.title || note.name) |  | ||||||
|                                 .filter(Boolean) |  | ||||||
|                                 .join(', ') + '\n'; |  | ||||||
|                         } |  | ||||||
|  |  | ||||||
|                         if (jsonContent.relations && Array.isArray(jsonContent.relations)) { |  | ||||||
|                             result += 'Relations: ' + jsonContent.relations |  | ||||||
|                                 .map((rel: any) => { |  | ||||||
|                                     const sourceNote = jsonContent.notes.find((n: any) => n.noteId === rel.sourceNoteId); |  | ||||||
|                                     const targetNote = jsonContent.notes.find((n: any) => n.noteId === rel.targetNoteId); |  | ||||||
|                                     const source = sourceNote ? (sourceNote.title || sourceNote.name) : 'unknown'; |  | ||||||
|                                     const target = targetNote ? (targetNote.title || targetNote.name) : 'unknown'; |  | ||||||
|                                     return `${source} → ${rel.name || ''} → ${target}`; |  | ||||||
|                                 }) |  | ||||||
|                                 .join('; '); |  | ||||||
|                         } |  | ||||||
|  |  | ||||||
|                         return result; |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|                 return JSON.stringify(content); |  | ||||||
|  |  | ||||||
|             case 'mermaid': |  | ||||||
|                 // Return mermaid diagrams as-is (they're human-readable) |  | ||||||
|                 return content; |  | ||||||
|  |  | ||||||
|             case 'geoMap': |  | ||||||
|                 if (mime === 'application/json') { |  | ||||||
|                     const jsonContent = JSON.parse(content); |  | ||||||
|                     let result = ''; |  | ||||||
|  |  | ||||||
|                     if (jsonContent.markers && Array.isArray(jsonContent.markers)) { |  | ||||||
|                         result += jsonContent.markers |  | ||||||
|                             .map((marker: any) => { |  | ||||||
|                                 return `Location: ${marker.title || ''} (${marker.lat}, ${marker.lng})${marker.description ? ' - ' + marker.description : ''}`; |  | ||||||
|                             }) |  | ||||||
|                             .join('\n'); |  | ||||||
|                     } |  | ||||||
|  |  | ||||||
|                     return result || JSON.stringify(content); |  | ||||||
|                 } |  | ||||||
|                 return JSON.stringify(content); |  | ||||||
|  |  | ||||||
|             case 'file': |  | ||||||
|             case 'image': |  | ||||||
|                 // For files and images, just return a placeholder |  | ||||||
|                 return `[${type} attachment]`; |  | ||||||
|  |  | ||||||
|             default: |  | ||||||
|                 return content; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     catch (error) { |  | ||||||
|         console.error(`Error extracting content from ${type} note:`, error); |  | ||||||
|         return content; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Gets context for a note to be embedded |  | ||||||
|  */ |  | ||||||
| export async function getNoteEmbeddingContext(noteId: string): Promise<NoteEmbeddingContext> { |  | ||||||
|     const note = becca.getNote(noteId); |  | ||||||
|  |  | ||||||
|     if (!note) { |  | ||||||
|         throw new Error(`Note ${noteId} not found`); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Get parent note titles |  | ||||||
|     const parentNotes = note.getParentNotes(); |  | ||||||
|     const parentTitles = parentNotes.map(note => note.title); |  | ||||||
|  |  | ||||||
|     // Get child note titles |  | ||||||
|     const childNotes = note.getChildNotes(); |  | ||||||
|     const childTitles = childNotes.map(note => note.title); |  | ||||||
|  |  | ||||||
|     // Get all attributes (not just owned ones) |  | ||||||
|     const attributes = note.getAttributes().map(attr => ({ |  | ||||||
|         type: attr.type, |  | ||||||
|         name: attr.name, |  | ||||||
|         value: attr.value |  | ||||||
|     })); |  | ||||||
|  |  | ||||||
|     // Get backlinks (notes that reference this note through relations) |  | ||||||
|     const targetRelations = note.getTargetRelations(); |  | ||||||
|     const backlinks = targetRelations |  | ||||||
|         .map(relation => { |  | ||||||
|             const sourceNote = relation.getNote(); |  | ||||||
|             if (sourceNote && sourceNote.type !== 'search') { // Filter out search notes |  | ||||||
|                 return { |  | ||||||
|                     sourceNoteId: sourceNote.noteId, |  | ||||||
|                     sourceTitle: sourceNote.title, |  | ||||||
|                     relationName: relation.name |  | ||||||
|                 }; |  | ||||||
|             } |  | ||||||
|             return null; |  | ||||||
|         }) |  | ||||||
|         .filter((item): item is { sourceNoteId: string; sourceTitle: string; relationName: string } => item !== null); |  | ||||||
|  |  | ||||||
|     // Get related notes through relations |  | ||||||
|     const relations = note.getRelations(); |  | ||||||
|     const relatedNotes = relations |  | ||||||
|         .map(relation => { |  | ||||||
|             const targetNote = relation.targetNote; |  | ||||||
|             if (targetNote) { |  | ||||||
|                 return { |  | ||||||
|                     targetNoteId: targetNote.noteId, |  | ||||||
|                     targetTitle: targetNote.title, |  | ||||||
|                     relationName: relation.name |  | ||||||
|                 }; |  | ||||||
|             } |  | ||||||
|             return null; |  | ||||||
|         }) |  | ||||||
|         .filter((item): item is { targetNoteId: string; targetTitle: string; relationName: string } => item !== null); |  | ||||||
|  |  | ||||||
|     // Extract important labels that might affect semantics |  | ||||||
|     const labelValues: Record<string, string> = {}; |  | ||||||
|     const labels = note.getLabels(); |  | ||||||
|     for (const label of labels) { |  | ||||||
|         // Skip CSS and UI-related labels that don't affect semantics |  | ||||||
|         if (!label.name.startsWith('css') && |  | ||||||
|             !label.name.startsWith('workspace') && |  | ||||||
|             !label.name.startsWith('hide') && |  | ||||||
|             !label.name.startsWith('collapsed')) { |  | ||||||
|             labelValues[label.name] = label.value; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Get attachments |  | ||||||
|     const attachments = note.getAttachments().map(att => ({ |  | ||||||
|         title: att.title, |  | ||||||
|         mime: att.mime |  | ||||||
|     })); |  | ||||||
|  |  | ||||||
|     // Get content |  | ||||||
|     let content = ""; |  | ||||||
|  |  | ||||||
|     try { |  | ||||||
|         // Use the enhanced context extractor for improved content extraction |  | ||||||
|         // We're using a dynamic import to avoid circular dependencies |  | ||||||
|         const { ContextExtractor } = await import('../../llm/context/index.js'); |  | ||||||
|         const contextExtractor = new ContextExtractor(); |  | ||||||
|  |  | ||||||
|         // Get the content using the enhanced formatNoteContent method in context extractor |  | ||||||
|         const noteContent = await contextExtractor.getNoteContent(noteId); |  | ||||||
|  |  | ||||||
|         if (noteContent) { |  | ||||||
|             content = noteContent; |  | ||||||
|  |  | ||||||
|             // For large content, consider chunking or summarization |  | ||||||
|             if (content.length > 10000) { |  | ||||||
|                 // Large content handling options: |  | ||||||
|  |  | ||||||
|                 // Option 1: Use our summarization feature |  | ||||||
|                 const summary = await contextExtractor.getNoteSummary(noteId); |  | ||||||
|                 if (summary) { |  | ||||||
|                     content = summary; |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 // Option 2: Alternative approach - use the first chunk if summarization fails |  | ||||||
|                 if (content.length > 10000) { |  | ||||||
|                     const chunks = await contextExtractor.getChunkedNoteContent(noteId); |  | ||||||
|                     if (chunks && chunks.length > 0) { |  | ||||||
|                         // Use the first chunk (most relevant/beginning) |  | ||||||
|                         content = chunks[0]; |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } else { |  | ||||||
|             // Fallback to original method if context extractor fails |  | ||||||
|             const rawContent = String(await note.getContent() || ""); |  | ||||||
|  |  | ||||||
|             // Process the content based on note type to extract meaningful text |  | ||||||
|             if (note.type === 'text' || note.type === 'code') { |  | ||||||
|                 content = rawContent; |  | ||||||
|             } else if (['canvas', 'mindMap', 'relationMap', 'mermaid', 'geoMap'].includes(note.type)) { |  | ||||||
|                 // Process structured content types |  | ||||||
|                 content = extractStructuredContent(rawContent, note.type, note.mime); |  | ||||||
|             } else if (note.type === 'image' || note.type === 'file') { |  | ||||||
|                 content = `[${note.type} attachment: ${note.mime}]`; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Clean the content to remove HTML tags and normalize whitespace |  | ||||||
|             content = await cleanNoteContent(content, note.type, note.mime); |  | ||||||
|         } |  | ||||||
|     } catch (err) { |  | ||||||
|         console.error(`Error getting content for note ${noteId}:`, err); |  | ||||||
|         content = `[Error extracting content]`; |  | ||||||
|  |  | ||||||
|         // Try fallback to original method |  | ||||||
|         try { |  | ||||||
|             const rawContent = String(await note.getContent() || ""); |  | ||||||
|             if (note.type === 'text' || note.type === 'code') { |  | ||||||
|                 content = rawContent; |  | ||||||
|             } else if (['canvas', 'mindMap', 'relationMap', 'mermaid', 'geoMap'].includes(note.type)) { |  | ||||||
|                 content = extractStructuredContent(rawContent, note.type, note.mime); |  | ||||||
|             } |  | ||||||
|             content = await cleanNoteContent(content, note.type, note.mime); |  | ||||||
|         } catch (fallbackErr) { |  | ||||||
|             console.error(`Fallback content extraction also failed for note ${noteId}:`, fallbackErr); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Get template/inheritance relationships |  | ||||||
|     // This is from FNote.getNotesToInheritAttributesFrom - recreating similar logic for BNote |  | ||||||
|     const templateRelations = note.getRelations('template').concat(note.getRelations('inherit')); |  | ||||||
|     const templateTitles = templateRelations |  | ||||||
|         .map(rel => rel.targetNote) |  | ||||||
|         .filter((note): note is BNote => note !== undefined) |  | ||||||
|         .map(templateNote => templateNote.title); |  | ||||||
|  |  | ||||||
|     return { |  | ||||||
|         noteId: note.noteId, |  | ||||||
|         title: note.title, |  | ||||||
|         content: content, |  | ||||||
|         type: note.type, |  | ||||||
|         mime: note.mime, |  | ||||||
|         dateCreated: note.dateCreated || "", |  | ||||||
|         dateModified: note.dateModified || "", |  | ||||||
|         attributes, |  | ||||||
|         parentTitles, |  | ||||||
|         childTitles, |  | ||||||
|         attachments, |  | ||||||
|         backlinks, |  | ||||||
|         relatedNotes, |  | ||||||
|         labelValues, |  | ||||||
|         templateTitles |  | ||||||
|     }; |  | ||||||
| } |  | ||||||
| @@ -1,140 +0,0 @@ | |||||||
| import type { NoteType, AttributeType } from "@triliumnext/commons"; |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Represents the context of a note that will be embedded |  | ||||||
|  */ |  | ||||||
| export interface NoteEmbeddingContext { |  | ||||||
|     noteId: string; |  | ||||||
|     title: string; |  | ||||||
|     content: string; |  | ||||||
|     type: NoteType; |  | ||||||
|     mime: string; |  | ||||||
|     dateCreated: string; |  | ||||||
|     dateModified: string; |  | ||||||
|     attributes: { |  | ||||||
|         type: AttributeType; |  | ||||||
|         name: string; |  | ||||||
|         value: string; |  | ||||||
|     }[]; |  | ||||||
|     parentTitles: string[]; |  | ||||||
|     childTitles: string[]; |  | ||||||
|     attachments: { |  | ||||||
|         title: string; |  | ||||||
|         mime: string; |  | ||||||
|     }[]; |  | ||||||
|     backlinks?: Backlink[]; |  | ||||||
|     relatedNotes?: RelatedNote[]; |  | ||||||
|     labelValues?: Record<string, string>; |  | ||||||
|     templateTitles?: string[]; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| export interface Backlink { |  | ||||||
|     sourceNoteId: string; |  | ||||||
|     sourceTitle: string; |  | ||||||
|     relationName: string; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| export interface RelatedNote { |  | ||||||
|     targetNoteId: string; |  | ||||||
|     targetTitle: string; |  | ||||||
|     relationName: string; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Information about an embedding model's capabilities |  | ||||||
|  */ |  | ||||||
| export interface EmbeddingModelInfo { |  | ||||||
|     dimension: number; |  | ||||||
|     contextWindow: number; |  | ||||||
|     /** |  | ||||||
|      * Whether the model guarantees normalized vectors (unit length) |  | ||||||
|      */ |  | ||||||
|     guaranteesNormalization: boolean; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Normalization status of a provider's embeddings |  | ||||||
|  */ |  | ||||||
| export enum NormalizationStatus { |  | ||||||
|     /** |  | ||||||
|      * Provider guarantees all embeddings are normalized to unit vectors |  | ||||||
|      */ |  | ||||||
|     GUARANTEED = 'guaranteed', |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Provider does not guarantee normalization, but embeddings are usually normalized |  | ||||||
|      */ |  | ||||||
|     USUALLY = 'usually', |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Provider does not guarantee normalization, embeddings must be normalized before use |  | ||||||
|      */ |  | ||||||
|     NEVER = 'never', |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Normalization status is unknown and should be checked at runtime |  | ||||||
|      */ |  | ||||||
|     UNKNOWN = 'unknown' |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Configuration for how embeddings should be generated |  | ||||||
|  */ |  | ||||||
| export interface EmbeddingConfig { |  | ||||||
|     model: string; |  | ||||||
|     dimension: number; |  | ||||||
|     type: 'float32' | 'float64'; |  | ||||||
|     /** |  | ||||||
|      * Whether embeddings should be normalized before use |  | ||||||
|      * If true, normalization will always be applied |  | ||||||
|      * If false, normalization depends on provider's status |  | ||||||
|      */ |  | ||||||
|     normalize?: boolean; |  | ||||||
|     /** |  | ||||||
|      * The normalization status of this provider |  | ||||||
|      */ |  | ||||||
|     normalizationStatus?: NormalizationStatus; |  | ||||||
|     batchSize?: number; |  | ||||||
|     contextWindowSize?: number; |  | ||||||
|     apiKey?: string; |  | ||||||
|     baseUrl?: string; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Core interface that all embedding providers must implement |  | ||||||
|  */ |  | ||||||
| export interface EmbeddingProvider { |  | ||||||
|     name: string; |  | ||||||
|     getConfig(): EmbeddingConfig; |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Returns information about the normalization status of this provider |  | ||||||
|      */ |  | ||||||
|     getNormalizationStatus(): NormalizationStatus; |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Verify that embeddings are properly normalized |  | ||||||
|      * @returns true if embeddings are properly normalized |  | ||||||
|      */ |  | ||||||
|     verifyNormalization?(sample?: Float32Array): Promise<boolean>; |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Generate embeddings for a single piece of text |  | ||||||
|      */ |  | ||||||
|     generateEmbeddings(text: string): Promise<Float32Array>; |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Generate embeddings for multiple pieces of text in batch |  | ||||||
|      */ |  | ||||||
|     generateBatchEmbeddings(texts: string[]): Promise<Float32Array[]>; |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Generate embeddings for a note with its full context |  | ||||||
|      */ |  | ||||||
|     generateNoteEmbeddings(context: NoteEmbeddingContext): Promise<Float32Array>; |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Generate embeddings for multiple notes with their contexts in batch |  | ||||||
|      */ |  | ||||||
|     generateBatchNoteEmbeddings(contexts: NoteEmbeddingContext[]): Promise<Float32Array[]>; |  | ||||||
| } |  | ||||||
| @@ -1,112 +0,0 @@ | |||||||
| import sql from "../../../services/sql.js"; |  | ||||||
| import log from "../../../services/log.js"; |  | ||||||
| import options from "../../../services/options.js"; |  | ||||||
| import cls from "../../../services/cls.js"; |  | ||||||
| import { processEmbeddingQueue, queueNoteForEmbedding } from "./queue.js"; |  | ||||||
| import eventService from "../../../services/events.js"; |  | ||||||
| import becca from "../../../becca/becca.js"; |  | ||||||
|  |  | ||||||
| // Add mutex to prevent concurrent processing |  | ||||||
| let isProcessingEmbeddings = false; |  | ||||||
|  |  | ||||||
| // Store interval reference for cleanup |  | ||||||
| let backgroundProcessingInterval: NodeJS.Timeout | null = null; |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Setup event listeners for embedding-related events |  | ||||||
|  */ |  | ||||||
| export function setupEmbeddingEventListeners() { |  | ||||||
|     // Listen for note content changes |  | ||||||
|     eventService.subscribe(eventService.NOTE_CONTENT_CHANGE, ({ entity }) => { |  | ||||||
|         if (entity && entity.noteId) { |  | ||||||
|             queueNoteForEmbedding(entity.noteId); |  | ||||||
|         } |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     // Listen for new notes |  | ||||||
|     eventService.subscribe(eventService.ENTITY_CREATED, ({ entityName, entity }) => { |  | ||||||
|         if (entityName === "notes" && entity && entity.noteId) { |  | ||||||
|             queueNoteForEmbedding(entity.noteId); |  | ||||||
|         } |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     // Listen for note title changes |  | ||||||
|     eventService.subscribe(eventService.NOTE_TITLE_CHANGED, ({ noteId }) => { |  | ||||||
|         if (noteId) { |  | ||||||
|             queueNoteForEmbedding(noteId); |  | ||||||
|         } |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     // Listen for note deletions |  | ||||||
|     eventService.subscribe(eventService.ENTITY_DELETED, ({ entityName, entityId }) => { |  | ||||||
|         if (entityName === "notes" && entityId) { |  | ||||||
|             queueNoteForEmbedding(entityId, 'DELETE'); |  | ||||||
|         } |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     // Listen for attribute changes that might affect context |  | ||||||
|     eventService.subscribe(eventService.ENTITY_CHANGED, ({ entityName, entity }) => { |  | ||||||
|         if (entityName === "attributes" && entity && entity.noteId) { |  | ||||||
|             queueNoteForEmbedding(entity.noteId); |  | ||||||
|         } |  | ||||||
|     }); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Setup background processing of the embedding queue |  | ||||||
|  */ |  | ||||||
| export async function setupEmbeddingBackgroundProcessing() { |  | ||||||
|     // Clear any existing interval |  | ||||||
|     if (backgroundProcessingInterval) { |  | ||||||
|         clearInterval(backgroundProcessingInterval); |  | ||||||
|         backgroundProcessingInterval = null; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     const interval = parseInt(await options.getOption('embeddingUpdateInterval') || '200', 10); |  | ||||||
|  |  | ||||||
|     backgroundProcessingInterval = setInterval(async () => { |  | ||||||
|         try { |  | ||||||
|             // Skip if already processing |  | ||||||
|             if (isProcessingEmbeddings) { |  | ||||||
|                 return; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Set mutex |  | ||||||
|             isProcessingEmbeddings = true; |  | ||||||
|  |  | ||||||
|             // Wrap in cls.init to ensure proper context |  | ||||||
|             cls.init(async () => { |  | ||||||
|                 await processEmbeddingQueue(); |  | ||||||
|             }); |  | ||||||
|         } catch (error: any) { |  | ||||||
|             log.error(`Error in background embedding processing: ${error.message || 'Unknown error'}`); |  | ||||||
|         } finally { |  | ||||||
|             // Always release the mutex |  | ||||||
|             isProcessingEmbeddings = false; |  | ||||||
|         } |  | ||||||
|     }, interval); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Stop background processing of the embedding queue |  | ||||||
|  */ |  | ||||||
| export function stopEmbeddingBackgroundProcessing() { |  | ||||||
|     if (backgroundProcessingInterval) { |  | ||||||
|         clearInterval(backgroundProcessingInterval); |  | ||||||
|         backgroundProcessingInterval = null; |  | ||||||
|         log.info("Embedding background processing stopped"); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Initialize embeddings system |  | ||||||
|  */ |  | ||||||
| export async function initEmbeddings() { |  | ||||||
|     if (await options.getOptionBool('aiEnabled')) { |  | ||||||
|         setupEmbeddingEventListeners(); |  | ||||||
|         await setupEmbeddingBackgroundProcessing(); |  | ||||||
|         log.info("Embeddings system initialized"); |  | ||||||
|     } else { |  | ||||||
|         log.info("Embeddings system disabled"); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @@ -1,112 +0,0 @@ | |||||||
| // Re-export all modules for easy access |  | ||||||
| import * as vectorUtils from './vector_utils.js'; |  | ||||||
| import * as storage from './storage.js'; |  | ||||||
| import * as contentProcessing from './content_processing.js'; |  | ||||||
| import * as queue from './queue.js'; |  | ||||||
| // Import chunking dynamically to prevent circular dependencies |  | ||||||
| // import * as chunking from './chunking.js'; |  | ||||||
| import * as events from './events.js'; |  | ||||||
| import * as stats from './stats.js'; |  | ||||||
| import * as indexOperations from './index_operations.js'; |  | ||||||
| import { getChunkingOperations } from './chunking/chunking_interface.js'; |  | ||||||
| import type { NoteEmbeddingContext } from './types.js'; |  | ||||||
|  |  | ||||||
| // Export types |  | ||||||
| export * from './types.js'; |  | ||||||
|  |  | ||||||
| // Maintain backward compatibility by exposing all functions at the top level |  | ||||||
| export const { |  | ||||||
|     cosineSimilarity, |  | ||||||
|     embeddingToBuffer, |  | ||||||
|     bufferToEmbedding, |  | ||||||
|     adaptEmbeddingDimensions, |  | ||||||
|     enhancedCosineSimilarity, |  | ||||||
|     selectOptimalEmbedding |  | ||||||
| } = vectorUtils; |  | ||||||
|  |  | ||||||
| export const { |  | ||||||
|     storeNoteEmbedding, |  | ||||||
|     getEmbeddingForNote, |  | ||||||
|     findSimilarNotes, |  | ||||||
|     deleteNoteEmbeddings |  | ||||||
| } = storage; |  | ||||||
|  |  | ||||||
| export const { |  | ||||||
|     getNoteEmbeddingContext, |  | ||||||
|     cleanNoteContent, |  | ||||||
|     extractStructuredContent |  | ||||||
| } = contentProcessing; |  | ||||||
|  |  | ||||||
| export const { |  | ||||||
|     queueNoteForEmbedding, |  | ||||||
|     getFailedEmbeddingNotes, |  | ||||||
|     retryFailedEmbedding, |  | ||||||
|     retryAllFailedEmbeddings, |  | ||||||
|     processEmbeddingQueue |  | ||||||
| } = queue; |  | ||||||
|  |  | ||||||
| // Export chunking function using the interface to break circular dependencies |  | ||||||
| export const processNoteWithChunking = async ( |  | ||||||
|     noteId: string, |  | ||||||
|     provider: any, |  | ||||||
|     context: NoteEmbeddingContext |  | ||||||
| ): Promise<void> => { |  | ||||||
|     const chunkingOps = await getChunkingOperations(); |  | ||||||
|     return chunkingOps.processNoteWithChunking(noteId, provider, context); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| export const { |  | ||||||
|     setupEmbeddingEventListeners, |  | ||||||
|     setupEmbeddingBackgroundProcessing, |  | ||||||
|     stopEmbeddingBackgroundProcessing, |  | ||||||
|     initEmbeddings |  | ||||||
| } = events; |  | ||||||
|  |  | ||||||
| export const { |  | ||||||
|     getEmbeddingStats, |  | ||||||
|     cleanupEmbeddings |  | ||||||
| } = stats; |  | ||||||
|  |  | ||||||
| export const { |  | ||||||
|     rebuildSearchIndex |  | ||||||
| } = indexOperations; |  | ||||||
|  |  | ||||||
| // Default export for backward compatibility |  | ||||||
| export default { |  | ||||||
|     // Vector utils |  | ||||||
|     cosineSimilarity: vectorUtils.cosineSimilarity, |  | ||||||
|     embeddingToBuffer: vectorUtils.embeddingToBuffer, |  | ||||||
|     bufferToEmbedding: vectorUtils.bufferToEmbedding, |  | ||||||
|  |  | ||||||
|     // Storage |  | ||||||
|     storeNoteEmbedding: storage.storeNoteEmbedding, |  | ||||||
|     getEmbeddingForNote: storage.getEmbeddingForNote, |  | ||||||
|     findSimilarNotes: storage.findSimilarNotes, |  | ||||||
|     deleteNoteEmbeddings: storage.deleteNoteEmbeddings, |  | ||||||
|  |  | ||||||
|     // Content processing |  | ||||||
|     getNoteEmbeddingContext: contentProcessing.getNoteEmbeddingContext, |  | ||||||
|  |  | ||||||
|     // Queue management |  | ||||||
|     queueNoteForEmbedding: queue.queueNoteForEmbedding, |  | ||||||
|     processEmbeddingQueue: queue.processEmbeddingQueue, |  | ||||||
|     getFailedEmbeddingNotes: queue.getFailedEmbeddingNotes, |  | ||||||
|     retryFailedEmbedding: queue.retryFailedEmbedding, |  | ||||||
|     retryAllFailedEmbeddings: queue.retryAllFailedEmbeddings, |  | ||||||
|  |  | ||||||
|     // Chunking - use the dynamic wrapper |  | ||||||
|     processNoteWithChunking, |  | ||||||
|  |  | ||||||
|     // Event handling |  | ||||||
|     setupEmbeddingEventListeners: events.setupEmbeddingEventListeners, |  | ||||||
|     setupEmbeddingBackgroundProcessing: events.setupEmbeddingBackgroundProcessing, |  | ||||||
|     stopEmbeddingBackgroundProcessing: events.stopEmbeddingBackgroundProcessing, |  | ||||||
|     initEmbeddings: events.initEmbeddings, |  | ||||||
|  |  | ||||||
|     // Stats and maintenance |  | ||||||
|     getEmbeddingStats: stats.getEmbeddingStats, |  | ||||||
|     cleanupEmbeddings: stats.cleanupEmbeddings, |  | ||||||
|  |  | ||||||
|     // Index operations |  | ||||||
|     rebuildSearchIndex: indexOperations.rebuildSearchIndex |  | ||||||
| }; |  | ||||||
| @@ -1,107 +0,0 @@ | |||||||
| import sql from "../../../services/sql.js"; |  | ||||||
| import log from "../../../services/log.js"; |  | ||||||
| import dateUtils from "../../../services/date_utils.js"; |  | ||||||
| import { bufferToEmbedding } from "./vector_utils.js"; |  | ||||||
| import indexService from "../index_service.js"; |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Rebuilds the search index structure without regenerating embeddings. |  | ||||||
|  * This optimizes the existing embeddings for faster searches. |  | ||||||
|  * |  | ||||||
|  * @returns The number of embeddings processed |  | ||||||
|  */ |  | ||||||
| export async function rebuildSearchIndex(): Promise<number> { |  | ||||||
|     log.info("Starting search index rebuild"); |  | ||||||
|     const startTime = Date.now(); |  | ||||||
|  |  | ||||||
|     try { |  | ||||||
|         // 1. Get count of all existing embeddings to track progress |  | ||||||
|         const totalEmbeddings = await sql.getValue( |  | ||||||
|             "SELECT COUNT(*) FROM note_embeddings" |  | ||||||
|         ) as number; |  | ||||||
|  |  | ||||||
|         if (totalEmbeddings === 0) { |  | ||||||
|             log.info("No embeddings found to rebuild index for"); |  | ||||||
|             return 0; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         log.info(`Found ${totalEmbeddings} embeddings to process`); |  | ||||||
|  |  | ||||||
|         // 2. Process embeddings in batches to avoid memory issues |  | ||||||
|         const batchSize = 100; |  | ||||||
|         let processed = 0; |  | ||||||
|  |  | ||||||
|         // Get unique provider/model combinations |  | ||||||
|         const providerModels = await sql.getRows( |  | ||||||
|             "SELECT DISTINCT providerId, modelId FROM note_embeddings" |  | ||||||
|         ) as {providerId: string, modelId: string}[]; |  | ||||||
|  |  | ||||||
|         // Process each provider/model combination |  | ||||||
|         for (const {providerId, modelId} of providerModels) { |  | ||||||
|             log.info(`Processing embeddings for provider: ${providerId}, model: ${modelId}`); |  | ||||||
|  |  | ||||||
|             // Get embeddings for this provider/model in batches |  | ||||||
|             let offset = 0; |  | ||||||
|             while (true) { |  | ||||||
|                 const embeddings = await sql.getRows(` |  | ||||||
|                     SELECT embedId, noteId, dimension, embedding, dateModified |  | ||||||
|                     FROM note_embeddings |  | ||||||
|                     WHERE providerId = ? AND modelId = ? |  | ||||||
|                     ORDER BY noteId |  | ||||||
|                     LIMIT ? OFFSET ?`, |  | ||||||
|                     [providerId, modelId, batchSize, offset] |  | ||||||
|                 ) as any[]; |  | ||||||
|  |  | ||||||
|                 if (embeddings.length === 0) { |  | ||||||
|                     break; |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 // Process this batch of embeddings |  | ||||||
|                 for (const embedding of embeddings) { |  | ||||||
|                     try { |  | ||||||
|                         // Convert buffer to embedding for processing |  | ||||||
|                         const vector = bufferToEmbedding(embedding.embedding, embedding.dimension); |  | ||||||
|  |  | ||||||
|                         // Optimize this embedding (in a real system, this might involve: |  | ||||||
|                         // - Adding to an optimized index structure |  | ||||||
|                         // - Normalizing vectors |  | ||||||
|                         // - Updating index metadata |  | ||||||
|                         // For this implementation, we'll just "touch" the record to simulate optimization) |  | ||||||
|                         await sql.execute(` |  | ||||||
|                             UPDATE note_embeddings |  | ||||||
|                             SET dateModified = ?, utcDateModified = ? |  | ||||||
|                             WHERE embedId = ?`, |  | ||||||
|                             [dateUtils.localNowDateTime(), dateUtils.utcNowDateTime(), embedding.embedId] |  | ||||||
|                         ); |  | ||||||
|  |  | ||||||
|                         processed++; |  | ||||||
|  |  | ||||||
|                         // Update progress every 10 embeddings |  | ||||||
|                         if (processed % 10 === 0) { |  | ||||||
|                             indexService.updateIndexRebuildProgress(10); |  | ||||||
|  |  | ||||||
|                             // Log progress every 100 embeddings |  | ||||||
|                             if (processed % 100 === 0) { |  | ||||||
|                                 const percent = Math.round((processed / totalEmbeddings) * 100); |  | ||||||
|                                 log.info(`Index rebuild progress: ${percent}% (${processed}/${totalEmbeddings})`); |  | ||||||
|                             } |  | ||||||
|                         } |  | ||||||
|                     } catch (error: any) { |  | ||||||
|                         log.error(`Error processing embedding ${embedding.embedId}: ${error.message || "Unknown error"}`); |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 offset += embeddings.length; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // 3. Finalize - could involve additional optimization steps |  | ||||||
|         const duration = Math.round((Date.now() - startTime) / 1000); |  | ||||||
|         log.info(`Index rebuild completed: processed ${processed} embeddings in ${duration} seconds`); |  | ||||||
|  |  | ||||||
|         return processed; |  | ||||||
|     } catch (error: any) { |  | ||||||
|         log.error(`Error during index rebuild: ${error.message || "Unknown error"}`); |  | ||||||
|         throw error; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @@ -1,67 +0,0 @@ | |||||||
| import log from "../../log.js"; |  | ||||||
| import options from "../../options.js"; |  | ||||||
| import { initEmbeddings } from "./index.js"; |  | ||||||
| import providerManager from "../providers/providers.js"; |  | ||||||
| import sqlInit from "../../sql_init.js"; |  | ||||||
| import sql from "../../sql.js"; |  | ||||||
| import { validateProviders, logValidationResults, hasWorkingEmbeddingProviders } from "../provider_validation.js"; |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Reset any stuck embedding queue items that were left in processing state |  | ||||||
|  * from a previous server shutdown |  | ||||||
|  */ |  | ||||||
| async function resetStuckEmbeddingQueue() { |  | ||||||
|     try { |  | ||||||
|         const stuckCount = await sql.getValue( |  | ||||||
|             "SELECT COUNT(*) FROM embedding_queue WHERE isProcessing = 1" |  | ||||||
|         ) as number; |  | ||||||
|  |  | ||||||
|         if (stuckCount > 0) { |  | ||||||
|             log.info(`Resetting ${stuckCount} stuck items in embedding queue from previous shutdown`); |  | ||||||
|  |  | ||||||
|             await sql.execute( |  | ||||||
|                 "UPDATE embedding_queue SET isProcessing = 0 WHERE isProcessing = 1" |  | ||||||
|             ); |  | ||||||
|         } |  | ||||||
|     } catch (error: any) { |  | ||||||
|         log.error(`Error resetting stuck embedding queue: ${error.message || error}`); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Initialize the embedding system |  | ||||||
|  */ |  | ||||||
| export async function initializeEmbeddings() { |  | ||||||
|     try { |  | ||||||
|         log.info("Initializing embedding system..."); |  | ||||||
|  |  | ||||||
|         // Check if the database is initialized before proceeding |  | ||||||
|         if (!sqlInit.isDbInitialized()) { |  | ||||||
|             log.info("Skipping embedding system initialization as database is not initialized yet."); |  | ||||||
|             return; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Reset any stuck embedding queue items from previous server shutdown |  | ||||||
|         await resetStuckEmbeddingQueue(); |  | ||||||
|  |  | ||||||
|         // Start the embedding system if AI is enabled |  | ||||||
|         if (await options.getOptionBool('aiEnabled')) { |  | ||||||
|             // Validate providers before starting the embedding system |  | ||||||
|             log.info("Validating AI providers before starting embedding system..."); |  | ||||||
|             const validation = await validateProviders(); |  | ||||||
|             logValidationResults(validation); |  | ||||||
|              |  | ||||||
|             if (await hasWorkingEmbeddingProviders()) { |  | ||||||
|                 // Embedding providers will be created on-demand when needed |  | ||||||
|                 await initEmbeddings(); |  | ||||||
|                 log.info("Embedding system initialized successfully."); |  | ||||||
|             } else { |  | ||||||
|                 log.info("Embedding system not started: No working embedding providers found. Please configure at least one AI provider (OpenAI, Ollama, or Voyage) to use embedding features."); |  | ||||||
|             } |  | ||||||
|         } else { |  | ||||||
|             log.info("Embedding system disabled (AI features are turned off)."); |  | ||||||
|         } |  | ||||||
|     } catch (error: any) { |  | ||||||
|         log.error(`Error initializing embedding system: ${error.message || error}`); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @@ -1,73 +0,0 @@ | |||||||
| import { BaseEmbeddingProvider } from "../base_embeddings.js"; |  | ||||||
| import type { EmbeddingConfig } from "../embeddings_interface.js"; |  | ||||||
| import crypto from "crypto"; |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Local embedding provider implementation |  | ||||||
|  * |  | ||||||
|  * This is a fallback provider that generates simple deterministic embeddings |  | ||||||
|  * using cryptographic hashing. These are not semantic vectors but can be used |  | ||||||
|  * for exact matches when no other providers are available. |  | ||||||
|  */ |  | ||||||
| export class LocalEmbeddingProvider extends BaseEmbeddingProvider { |  | ||||||
|     override name = "local"; |  | ||||||
|  |  | ||||||
|     constructor(config: EmbeddingConfig) { |  | ||||||
|         super(config); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Generate a simple embedding by hashing the text |  | ||||||
|      */ |  | ||||||
|     async generateEmbeddings(text: string): Promise<Float32Array> { |  | ||||||
|         const dimension = this.config.dimension || 384; |  | ||||||
|         const result = new Float32Array(dimension); |  | ||||||
|  |  | ||||||
|         // Generate a hash of the input text |  | ||||||
|         const hash = crypto.createHash('sha256').update(text).digest(); |  | ||||||
|  |  | ||||||
|         // Use the hash to seed a deterministic PRNG |  | ||||||
|         let seed = 0; |  | ||||||
|         for (let i = 0; i < hash.length; i += 4) { |  | ||||||
|             seed = (seed * 65536 + hash.readUInt32LE(i % (hash.length - 3))) >>> 0; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Generate pseudo-random but deterministic values for the embedding |  | ||||||
|         for (let i = 0; i < dimension; i++) { |  | ||||||
|             // Generate next pseudo-random number |  | ||||||
|             seed = (seed * 1664525 + 1013904223) >>> 0; |  | ||||||
|  |  | ||||||
|             // Convert to a float between -1 and 1 |  | ||||||
|             result[i] = (seed / 2147483648) - 1; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Normalize the vector |  | ||||||
|         let magnitude = 0; |  | ||||||
|         for (let i = 0; i < dimension; i++) { |  | ||||||
|             magnitude += result[i] * result[i]; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         magnitude = Math.sqrt(magnitude); |  | ||||||
|         if (magnitude > 0) { |  | ||||||
|             for (let i = 0; i < dimension; i++) { |  | ||||||
|                 result[i] /= magnitude; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         return result; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Generate embeddings for multiple texts |  | ||||||
|      */ |  | ||||||
|     override async generateBatchEmbeddings(texts: string[]): Promise<Float32Array[]> { |  | ||||||
|         const results: Float32Array[] = []; |  | ||||||
|  |  | ||||||
|         for (const text of texts) { |  | ||||||
|             const embedding = await this.generateEmbeddings(text); |  | ||||||
|             results.push(embedding); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         return results; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @@ -1,324 +0,0 @@ | |||||||
| import log from "../../../log.js"; |  | ||||||
| import { BaseEmbeddingProvider } from "../base_embeddings.js"; |  | ||||||
| import type { EmbeddingConfig } from "../embeddings_interface.js"; |  | ||||||
| import { NormalizationStatus } from "../embeddings_interface.js"; |  | ||||||
| import { LLM_CONSTANTS } from "../../constants/provider_constants.js"; |  | ||||||
| import type { EmbeddingModelInfo } from "../../interfaces/embedding_interfaces.js"; |  | ||||||
| import { Ollama } from "ollama"; |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Ollama embedding provider implementation using the official Ollama client |  | ||||||
|  */ |  | ||||||
| export class OllamaEmbeddingProvider extends BaseEmbeddingProvider { |  | ||||||
|     override name = "ollama"; |  | ||||||
|     private client: Ollama | null = null; |  | ||||||
|  |  | ||||||
|     constructor(config: EmbeddingConfig) { |  | ||||||
|         super(config); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Get the Ollama client instance |  | ||||||
|      */ |  | ||||||
|     private getClient(): Ollama { |  | ||||||
|         if (!this.client) { |  | ||||||
|             this.client = new Ollama({ host: this.baseUrl }); |  | ||||||
|         } |  | ||||||
|         return this.client; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Initialize the provider by detecting model capabilities |  | ||||||
|      */ |  | ||||||
|     override async initialize(): Promise<void> { |  | ||||||
|         const modelName = this.config.model || "llama3"; |  | ||||||
|         try { |  | ||||||
|             // Detect model capabilities |  | ||||||
|             const modelInfo = await this.getModelInfo(modelName); |  | ||||||
|  |  | ||||||
|             // Update the config dimension |  | ||||||
|             this.config.dimension = modelInfo.dimension; |  | ||||||
|  |  | ||||||
|             log.info(`Ollama model ${modelName} initialized with dimension ${this.config.dimension} and context window ${modelInfo.contextWidth}`); |  | ||||||
|         } catch (error: any) { |  | ||||||
|             log.error(`Error initializing Ollama provider: ${error.message}`); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Fetch detailed model information from Ollama API |  | ||||||
|      * @param modelName The name of the model to fetch information for |  | ||||||
|      */ |  | ||||||
|     private async fetchModelCapabilities(modelName: string): Promise<EmbeddingModelInfo | null> { |  | ||||||
|         try { |  | ||||||
|             const client = this.getClient(); |  | ||||||
|  |  | ||||||
|             // Get model info using the client's show method |  | ||||||
|             const modelData = await client.show({ model: modelName }); |  | ||||||
|  |  | ||||||
|             if (modelData && modelData.parameters) { |  | ||||||
|                 const params = modelData.parameters as any; |  | ||||||
|                 // Extract context length from parameters (different models might use different parameter names) |  | ||||||
|                 const contextWindow = params.context_length || |  | ||||||
|                                      params.num_ctx || |  | ||||||
|                                      params.context_window || |  | ||||||
|                                      (LLM_CONSTANTS.OLLAMA_MODEL_CONTEXT_WINDOWS as Record<string, number>).default; |  | ||||||
|  |  | ||||||
|                 // Some models might provide embedding dimensions |  | ||||||
|                 const embeddingDimension = params.embedding_length || params.dim || null; |  | ||||||
|  |  | ||||||
|                 log.info(`Fetched Ollama model info for ${modelName}: context window ${contextWindow}`); |  | ||||||
|  |  | ||||||
|                 return { |  | ||||||
|                     name: modelName, |  | ||||||
|                     dimension: embeddingDimension || 0, // We'll detect this separately if not provided |  | ||||||
|                     contextWidth: contextWindow, |  | ||||||
|                     type: 'float32' |  | ||||||
|                 }; |  | ||||||
|             } |  | ||||||
|         } catch (error: any) { |  | ||||||
|             log.info(`Could not fetch model info from Ollama API: ${error.message}. Will try embedding test.`); |  | ||||||
|             // We'll fall back to embedding test if this fails |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         return null; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Get model information by probing the API |  | ||||||
|      */ |  | ||||||
|     async getModelInfo(modelName: string): Promise<EmbeddingModelInfo> { |  | ||||||
|         // Check cache first |  | ||||||
|         if (this.modelInfoCache.has(modelName)) { |  | ||||||
|             return this.modelInfoCache.get(modelName)!; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Try to fetch model capabilities from API |  | ||||||
|         const apiModelInfo = await this.fetchModelCapabilities(modelName); |  | ||||||
|         if (apiModelInfo) { |  | ||||||
|             // If we have context window but no embedding dimension, we need to detect the dimension |  | ||||||
|             if (apiModelInfo.contextWidth && !apiModelInfo.dimension) { |  | ||||||
|                 try { |  | ||||||
|                     // Detect dimension with a test embedding |  | ||||||
|                     const dimension = await this.detectEmbeddingDimension(modelName); |  | ||||||
|                     apiModelInfo.dimension = dimension; |  | ||||||
|                 } catch (error) { |  | ||||||
|                     // If dimension detection fails, fall back to defaults |  | ||||||
|                     const baseModelName = modelName.split(':')[0]; |  | ||||||
|                     apiModelInfo.dimension = (LLM_CONSTANTS.OLLAMA_MODEL_DIMENSIONS as Record<string, number>)[baseModelName] || |  | ||||||
|                                            (LLM_CONSTANTS.OLLAMA_MODEL_DIMENSIONS as Record<string, number>).default; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Cache and return the API-provided info |  | ||||||
|             this.modelInfoCache.set(modelName, apiModelInfo); |  | ||||||
|             this.config.dimension = apiModelInfo.dimension; |  | ||||||
|             return apiModelInfo; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // If API info fetch fails, fall back to test embedding |  | ||||||
|         try { |  | ||||||
|             const dimension = await this.detectEmbeddingDimension(modelName); |  | ||||||
|             const baseModelName = modelName.split(':')[0]; |  | ||||||
|             const contextWindow = (LLM_CONSTANTS.OLLAMA_MODEL_CONTEXT_WINDOWS as Record<string, number>)[baseModelName] || |  | ||||||
|                                 (LLM_CONSTANTS.OLLAMA_MODEL_CONTEXT_WINDOWS as Record<string, number>).default; |  | ||||||
|  |  | ||||||
|             const modelInfo: EmbeddingModelInfo = { |  | ||||||
|                 name: modelName, |  | ||||||
|                 dimension, |  | ||||||
|                 contextWidth: contextWindow, |  | ||||||
|                 type: 'float32' |  | ||||||
|             }; |  | ||||||
|             this.modelInfoCache.set(modelName, modelInfo); |  | ||||||
|             this.config.dimension = dimension; |  | ||||||
|  |  | ||||||
|             log.info(`Detected Ollama model ${modelName} with dimension ${dimension} (context: ${contextWindow})`); |  | ||||||
|             return modelInfo; |  | ||||||
|         } catch (error: any) { |  | ||||||
|             log.error(`Error detecting Ollama model capabilities: ${error.message}`); |  | ||||||
|  |  | ||||||
|             // If all detection fails, use defaults based on model name |  | ||||||
|             const baseModelName = modelName.split(':')[0]; |  | ||||||
|             const dimension = (LLM_CONSTANTS.OLLAMA_MODEL_DIMENSIONS as Record<string, number>)[baseModelName] || |  | ||||||
|                             (LLM_CONSTANTS.OLLAMA_MODEL_DIMENSIONS as Record<string, number>).default; |  | ||||||
|             const contextWindow = (LLM_CONSTANTS.OLLAMA_MODEL_CONTEXT_WINDOWS as Record<string, number>)[baseModelName] || |  | ||||||
|                                 (LLM_CONSTANTS.OLLAMA_MODEL_CONTEXT_WINDOWS as Record<string, number>).default; |  | ||||||
|  |  | ||||||
|             log.info(`Using default parameters for model ${modelName}: dimension ${dimension}, context ${contextWindow}`); |  | ||||||
|  |  | ||||||
|             const modelInfo: EmbeddingModelInfo = { |  | ||||||
|                 name: modelName, |  | ||||||
|                 dimension, |  | ||||||
|                 contextWidth: contextWindow, |  | ||||||
|                 type: 'float32' |  | ||||||
|             }; |  | ||||||
|             this.modelInfoCache.set(modelName, modelInfo); |  | ||||||
|             this.config.dimension = dimension; |  | ||||||
|  |  | ||||||
|             return modelInfo; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Detect embedding dimension by making a test API call |  | ||||||
|      */ |  | ||||||
|     private async detectEmbeddingDimension(modelName: string): Promise<number> { |  | ||||||
|         try { |  | ||||||
|             const client = this.getClient(); |  | ||||||
|             const embedResponse = await client.embeddings({ |  | ||||||
|                 model: modelName, |  | ||||||
|                 prompt: "Test" |  | ||||||
|             }); |  | ||||||
|  |  | ||||||
|             if (embedResponse && Array.isArray(embedResponse.embedding)) { |  | ||||||
|                 return embedResponse.embedding.length; |  | ||||||
|             } else { |  | ||||||
|                 throw new Error("Could not detect embedding dimensions"); |  | ||||||
|             } |  | ||||||
|         } catch (error) { |  | ||||||
|             throw new Error(`Failed to detect embedding dimensions: ${error}`); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Get the current embedding dimension |  | ||||||
|      */ |  | ||||||
|     override getDimension(): number { |  | ||||||
|         return this.config.dimension; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Generate embeddings for a single text |  | ||||||
|      */ |  | ||||||
|     async generateEmbeddings(text: string): Promise<Float32Array> { |  | ||||||
|         // Handle empty text |  | ||||||
|         if (!text.trim()) { |  | ||||||
|             return new Float32Array(this.config.dimension); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Configuration for retries |  | ||||||
|         const maxRetries = 3; |  | ||||||
|         let retryCount = 0; |  | ||||||
|         let lastError: any = null; |  | ||||||
|  |  | ||||||
|         while (retryCount <= maxRetries) { |  | ||||||
|             try { |  | ||||||
|                 const modelName = this.config.model || "llama3"; |  | ||||||
|  |  | ||||||
|                 // Ensure we have model info |  | ||||||
|                 const modelInfo = await this.getModelInfo(modelName); |  | ||||||
|  |  | ||||||
|                 // Trim text if it might exceed context window (rough character estimate) |  | ||||||
|                 // This is a simplistic approach - ideally we'd count tokens properly |  | ||||||
|                 const charLimit = (modelInfo.contextWidth || 8192) * 4; // Rough estimate: avg 4 chars per token |  | ||||||
|                 const trimmedText = text.length > charLimit ? text.substring(0, charLimit) : text; |  | ||||||
|  |  | ||||||
|                 const client = this.getClient(); |  | ||||||
|                 const response = await client.embeddings({ |  | ||||||
|                     model: modelName, |  | ||||||
|                     prompt: trimmedText |  | ||||||
|                 }); |  | ||||||
|  |  | ||||||
|                 if (response && Array.isArray(response.embedding)) { |  | ||||||
|                     // Success! Return the embedding |  | ||||||
|                     return new Float32Array(response.embedding); |  | ||||||
|                 } else { |  | ||||||
|                     throw new Error("Unexpected response structure from Ollama API"); |  | ||||||
|                 } |  | ||||||
|             } catch (error: any) { |  | ||||||
|                 lastError = error; |  | ||||||
|                 // Only retry on timeout or connection errors |  | ||||||
|                 const errorMessage = error.message || "Unknown error"; |  | ||||||
|                 const isTimeoutError = errorMessage.includes('timeout') || |  | ||||||
|                                      errorMessage.includes('socket hang up') || |  | ||||||
|                                      errorMessage.includes('ECONNREFUSED') || |  | ||||||
|                                      errorMessage.includes('ECONNRESET') || |  | ||||||
|                                      errorMessage.includes('AbortError') || |  | ||||||
|                                      errorMessage.includes('NetworkError'); |  | ||||||
|  |  | ||||||
|                 if (isTimeoutError && retryCount < maxRetries) { |  | ||||||
|                     // Exponential backoff with jitter |  | ||||||
|                     const delay = Math.min(Math.pow(2, retryCount) * 1000 + Math.random() * 1000, 15000); |  | ||||||
|                     log.info(`Ollama embedding timeout, retrying in ${Math.round(delay/1000)}s (attempt ${retryCount + 1}/${maxRetries})`); |  | ||||||
|                     await new Promise(resolve => setTimeout(resolve, delay)); |  | ||||||
|                     retryCount++; |  | ||||||
|                 } else { |  | ||||||
|                     // Non-retryable error or max retries exceeded |  | ||||||
|                     const errorMessage = error.message || "Unknown error"; |  | ||||||
|                     log.error(`Ollama embedding error: ${errorMessage}`); |  | ||||||
|                     throw new Error(`Ollama embedding error: ${errorMessage}`); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // If we get here, we've exceeded our retry limit |  | ||||||
|         const errorMessage = lastError.message || "Unknown error"; |  | ||||||
|         log.error(`Ollama embedding error after ${maxRetries} retries: ${errorMessage}`); |  | ||||||
|         throw new Error(`Ollama embedding error after ${maxRetries} retries: ${errorMessage}`); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * More specific implementation of batch size error detection for Ollama |  | ||||||
|      */ |  | ||||||
|     protected override isBatchSizeError(error: any): boolean { |  | ||||||
|         const errorMessage = error?.message || ''; |  | ||||||
|         const ollamaBatchSizeErrorPatterns = [ |  | ||||||
|             'context length', 'token limit', 'out of memory', |  | ||||||
|             'too large', 'overloaded', 'prompt too long', |  | ||||||
|             'too many tokens', 'maximum size' |  | ||||||
|         ]; |  | ||||||
|  |  | ||||||
|         return ollamaBatchSizeErrorPatterns.some(pattern => |  | ||||||
|             errorMessage.toLowerCase().includes(pattern.toLowerCase()) |  | ||||||
|         ); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Generate embeddings for multiple texts |  | ||||||
|      * |  | ||||||
|      * Note: Ollama API doesn't support batch embedding, so we process them sequentially |  | ||||||
|      * but using the adaptive batch processor to handle rate limits and retries |  | ||||||
|      */ |  | ||||||
|     override async generateBatchEmbeddings(texts: string[]): Promise<Float32Array[]> { |  | ||||||
|         if (texts.length === 0) { |  | ||||||
|             return []; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             return await this.processWithAdaptiveBatch( |  | ||||||
|                 texts, |  | ||||||
|                 async (batch) => { |  | ||||||
|                     const results: Float32Array[] = []; |  | ||||||
|  |  | ||||||
|                     // For Ollama, we have to process one at a time |  | ||||||
|                     for (const text of batch) { |  | ||||||
|                         // Skip empty texts |  | ||||||
|                         if (!text.trim()) { |  | ||||||
|                             results.push(new Float32Array(this.config.dimension)); |  | ||||||
|                             continue; |  | ||||||
|                         } |  | ||||||
|  |  | ||||||
|                         const embedding = await this.generateEmbeddings(text); |  | ||||||
|                         results.push(embedding); |  | ||||||
|                     } |  | ||||||
|  |  | ||||||
|                     return results; |  | ||||||
|                 }, |  | ||||||
|                 this.isBatchSizeError |  | ||||||
|             ); |  | ||||||
|         } |  | ||||||
|         catch (error: any) { |  | ||||||
|             const errorMessage = error.message || "Unknown error"; |  | ||||||
|             log.error(`Ollama batch embedding error: ${errorMessage}`); |  | ||||||
|             throw new Error(`Ollama batch embedding error: ${errorMessage}`); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Returns the normalization status for Ollama embeddings |  | ||||||
|      * Ollama embeddings are not guaranteed to be normalized |  | ||||||
|      */ |  | ||||||
|     override getNormalizationStatus(): NormalizationStatus { |  | ||||||
|         return NormalizationStatus.NEVER; // Be conservative and always normalize |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @@ -1,318 +0,0 @@ | |||||||
| import log from "../../../log.js"; |  | ||||||
| import { BaseEmbeddingProvider } from "../base_embeddings.js"; |  | ||||||
| import type { EmbeddingConfig } from "../embeddings_interface.js"; |  | ||||||
| import { NormalizationStatus } from "../embeddings_interface.js"; |  | ||||||
| import { LLM_CONSTANTS } from "../../constants/provider_constants.js"; |  | ||||||
| import type { EmbeddingModelInfo } from "../../interfaces/embedding_interfaces.js"; |  | ||||||
| import OpenAI from "openai"; |  | ||||||
| import { PROVIDER_EMBEDDING_CAPABILITIES } from '../../constants/search_constants.js'; |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * OpenAI embedding provider implementation using the official SDK |  | ||||||
|  */ |  | ||||||
| export class OpenAIEmbeddingProvider extends BaseEmbeddingProvider { |  | ||||||
|     override name = "openai"; |  | ||||||
|     private client: OpenAI | null = null; |  | ||||||
|  |  | ||||||
|     constructor(config: EmbeddingConfig) { |  | ||||||
|         super(config); |  | ||||||
|         this.initClient(); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Initialize the OpenAI client |  | ||||||
|      */ |  | ||||||
|     private initClient() { |  | ||||||
|         if (this.apiKey) { |  | ||||||
|             this.client = new OpenAI({ |  | ||||||
|                 apiKey: this.apiKey, |  | ||||||
|                 baseURL: this.baseUrl |  | ||||||
|             }); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Initialize the provider by detecting model capabilities |  | ||||||
|      */ |  | ||||||
|     override async initialize(): Promise<void> { |  | ||||||
|         const modelName = this.config.model || "text-embedding-3-small"; |  | ||||||
|         try { |  | ||||||
|             // Initialize client if needed |  | ||||||
|             if (!this.client && this.apiKey) { |  | ||||||
|                 this.initClient(); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Detect model capabilities |  | ||||||
|             const modelInfo = await this.getModelInfo(modelName); |  | ||||||
|  |  | ||||||
|             // Update the config dimension |  | ||||||
|             this.config.dimension = modelInfo.dimension; |  | ||||||
|  |  | ||||||
|             log.info(`OpenAI model ${modelName} initialized with dimension ${this.config.dimension} and context window ${modelInfo.contextWidth}`); |  | ||||||
|         } catch (error: any) { |  | ||||||
|             log.error(`Error initializing OpenAI provider: ${error.message}`); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Fetch model information from the OpenAI API |  | ||||||
|      */ |  | ||||||
|     private async fetchModelCapabilities(modelName: string): Promise<EmbeddingModelInfo | null> { |  | ||||||
|         if (!this.client) { |  | ||||||
|             return null; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             // Get model details using the SDK |  | ||||||
|             const model = await this.client.models.retrieve(modelName); |  | ||||||
|  |  | ||||||
|             if (model) { |  | ||||||
|                 // Different model families may have different ways of exposing context window |  | ||||||
|                 let contextWindow = 0; |  | ||||||
|                 let dimension = 0; |  | ||||||
|  |  | ||||||
|                 // Extract context window if available from the response |  | ||||||
|                 const modelData = model as any; |  | ||||||
|  |  | ||||||
|                 if (modelData.context_window) { |  | ||||||
|                     contextWindow = modelData.context_window; |  | ||||||
|                 } else if (modelData.limits && modelData.limits.context_window) { |  | ||||||
|                     contextWindow = modelData.limits.context_window; |  | ||||||
|                 } else if (modelData.limits && modelData.limits.context_length) { |  | ||||||
|                     contextWindow = modelData.limits.context_length; |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 // Extract embedding dimensions if available |  | ||||||
|                 if (modelData.dimensions) { |  | ||||||
|                     dimension = modelData.dimensions; |  | ||||||
|                 } else if (modelData.embedding_dimension) { |  | ||||||
|                     dimension = modelData.embedding_dimension; |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 // If we didn't get all the info, use defaults for missing values |  | ||||||
|                 if (!contextWindow) { |  | ||||||
|                     // Set contextWindow based on model name patterns |  | ||||||
|                     if (modelName.includes('embedding-3')) { |  | ||||||
|                         contextWindow = PROVIDER_EMBEDDING_CAPABILITIES.OPENAI.MODELS['text-embedding-3-small'].contextWindow; |  | ||||||
|                     } else { |  | ||||||
|                         contextWindow = PROVIDER_EMBEDDING_CAPABILITIES.OPENAI.MODELS.default.contextWindow; |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 if (!dimension) { |  | ||||||
|                     // Set default dimensions based on model name patterns |  | ||||||
|                     if (modelName.includes('ada') || modelName.includes('embedding-ada')) { |  | ||||||
|                         dimension = LLM_CONSTANTS.EMBEDDING_DIMENSIONS.OPENAI.ADA; |  | ||||||
|                     } else if (modelName.includes('embedding-3-small')) { |  | ||||||
|                         dimension = PROVIDER_EMBEDDING_CAPABILITIES.OPENAI.MODELS['text-embedding-3-small'].dimension; |  | ||||||
|                     } else if (modelName.includes('embedding-3-large')) { |  | ||||||
|                         dimension = PROVIDER_EMBEDDING_CAPABILITIES.OPENAI.MODELS['text-embedding-3-large'].dimension; |  | ||||||
|                     } else { |  | ||||||
|                         dimension = PROVIDER_EMBEDDING_CAPABILITIES.OPENAI.MODELS.default.dimension; |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 log.info(`Fetched OpenAI model info for ${modelName}: context window ${contextWindow}, dimension ${dimension}`); |  | ||||||
|  |  | ||||||
|                 return { |  | ||||||
|                     name: modelName, |  | ||||||
|                     dimension, |  | ||||||
|                     contextWidth: contextWindow, |  | ||||||
|                     type: 'float32' |  | ||||||
|                 }; |  | ||||||
|             } |  | ||||||
|         } catch (error: any) { |  | ||||||
|             log.info(`Could not fetch model info from OpenAI API: ${error.message}. Will try embedding test.`); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         return null; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Get model information including embedding dimensions |  | ||||||
|      */ |  | ||||||
|     async getModelInfo(modelName: string): Promise<EmbeddingModelInfo> { |  | ||||||
|         // Check cache first |  | ||||||
|         if (this.modelInfoCache.has(modelName)) { |  | ||||||
|             return this.modelInfoCache.get(modelName)!; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Try to fetch model capabilities from API |  | ||||||
|         const apiModelInfo = await this.fetchModelCapabilities(modelName); |  | ||||||
|         if (apiModelInfo) { |  | ||||||
|             // Cache and return the API-provided info |  | ||||||
|             this.modelInfoCache.set(modelName, apiModelInfo); |  | ||||||
|             this.config.dimension = apiModelInfo.dimension; |  | ||||||
|             return apiModelInfo; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // If API info fetch fails, try to detect embedding dimension with a test call |  | ||||||
|         try { |  | ||||||
|             const testEmbedding = await this.generateEmbeddings("Test"); |  | ||||||
|             const dimension = testEmbedding.length; |  | ||||||
|  |  | ||||||
|             // Use default context window |  | ||||||
|             let contextWindow = PROVIDER_EMBEDDING_CAPABILITIES.OPENAI.MODELS.default.contextWindow; |  | ||||||
|  |  | ||||||
|             const modelInfo: EmbeddingModelInfo = { |  | ||||||
|                 name: modelName, |  | ||||||
|                 dimension, |  | ||||||
|                 contextWidth: contextWindow, |  | ||||||
|                 type: 'float32' |  | ||||||
|             }; |  | ||||||
|             this.modelInfoCache.set(modelName, modelInfo); |  | ||||||
|             this.config.dimension = dimension; |  | ||||||
|  |  | ||||||
|             log.info(`Detected OpenAI model ${modelName} with dimension ${dimension} (context: ${contextWindow})`); |  | ||||||
|             return modelInfo; |  | ||||||
|         } catch (error: any) { |  | ||||||
|             // If detection fails, use defaults |  | ||||||
|             const dimension = PROVIDER_EMBEDDING_CAPABILITIES.OPENAI.MODELS.default.dimension; |  | ||||||
|             const contextWindow = PROVIDER_EMBEDDING_CAPABILITIES.OPENAI.MODELS.default.contextWindow; |  | ||||||
|  |  | ||||||
|             log.info(`Using default parameters for OpenAI model ${modelName}: dimension ${dimension}, context ${contextWindow}`); |  | ||||||
|  |  | ||||||
|             const modelInfo: EmbeddingModelInfo = { |  | ||||||
|                 name: modelName, |  | ||||||
|                 dimension, |  | ||||||
|                 contextWidth: contextWindow, |  | ||||||
|                 type: 'float32' |  | ||||||
|             }; |  | ||||||
|             this.modelInfoCache.set(modelName, modelInfo); |  | ||||||
|             this.config.dimension = dimension; |  | ||||||
|  |  | ||||||
|             return modelInfo; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Generate embeddings for a single text |  | ||||||
|      */ |  | ||||||
|     async generateEmbeddings(text: string): Promise<Float32Array> { |  | ||||||
|         try { |  | ||||||
|             if (!text.trim()) { |  | ||||||
|                 return new Float32Array(this.config.dimension); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             if (!this.client) { |  | ||||||
|                 this.initClient(); |  | ||||||
|                 if (!this.client) { |  | ||||||
|                     throw new Error("OpenAI client initialization failed"); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             const response = await this.client.embeddings.create({ |  | ||||||
|                 model: this.config.model || "text-embedding-3-small", |  | ||||||
|                 input: text, |  | ||||||
|                 encoding_format: "float" |  | ||||||
|             }); |  | ||||||
|  |  | ||||||
|             if (response && response.data && response.data[0] && response.data[0].embedding) { |  | ||||||
|                 return new Float32Array(response.data[0].embedding); |  | ||||||
|             } else { |  | ||||||
|                 throw new Error("Unexpected response structure from OpenAI API"); |  | ||||||
|             } |  | ||||||
|         } catch (error: any) { |  | ||||||
|             const errorMessage = error.message || "Unknown error"; |  | ||||||
|             log.error(`OpenAI embedding error: ${errorMessage}`); |  | ||||||
|             throw new Error(`OpenAI embedding error: ${errorMessage}`); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * More specific implementation of batch size error detection for OpenAI |  | ||||||
|      */ |  | ||||||
|     protected override isBatchSizeError(error: any): boolean { |  | ||||||
|         const errorMessage = error?.message || ''; |  | ||||||
|         const openAIBatchSizeErrorPatterns = [ |  | ||||||
|             'batch size', 'too many inputs', 'context length exceeded', |  | ||||||
|             'maximum context length', 'token limit', 'rate limit exceeded', |  | ||||||
|             'tokens in the messages', 'reduce the length', 'too long' |  | ||||||
|         ]; |  | ||||||
|  |  | ||||||
|         return openAIBatchSizeErrorPatterns.some(pattern => |  | ||||||
|             errorMessage.toLowerCase().includes(pattern.toLowerCase()) |  | ||||||
|         ); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Custom implementation for batched OpenAI embeddings |  | ||||||
|      */ |  | ||||||
|     async generateBatchEmbeddingsWithAPI(texts: string[]): Promise<Float32Array[]> { |  | ||||||
|         if (texts.length === 0) { |  | ||||||
|             return []; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (!this.client) { |  | ||||||
|             this.initClient(); |  | ||||||
|             if (!this.client) { |  | ||||||
|                 throw new Error("OpenAI client initialization failed"); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         const response = await this.client.embeddings.create({ |  | ||||||
|             model: this.config.model || "text-embedding-3-small", |  | ||||||
|             input: texts, |  | ||||||
|             encoding_format: "float" |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         if (response && response.data) { |  | ||||||
|             // Sort the embeddings by index to ensure they match the input order |  | ||||||
|             const sortedEmbeddings = response.data |  | ||||||
|                 .sort((a, b) => a.index - b.index) |  | ||||||
|                 .map(item => new Float32Array(item.embedding)); |  | ||||||
|  |  | ||||||
|             return sortedEmbeddings; |  | ||||||
|         } else { |  | ||||||
|             throw new Error("Unexpected response structure from OpenAI API"); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Generate embeddings for multiple texts in a single batch |  | ||||||
|      * OpenAI API supports batch embedding, so we implement a custom version |  | ||||||
|      */ |  | ||||||
|     override async generateBatchEmbeddings(texts: string[]): Promise<Float32Array[]> { |  | ||||||
|         if (texts.length === 0) { |  | ||||||
|             return []; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             return await this.processWithAdaptiveBatch( |  | ||||||
|                 texts, |  | ||||||
|                 async (batch) => { |  | ||||||
|                     // Filter out empty texts and use the API batch functionality |  | ||||||
|                     const filteredBatch = batch.filter(text => text.trim().length > 0); |  | ||||||
|  |  | ||||||
|                     if (filteredBatch.length === 0) { |  | ||||||
|                         // If all texts are empty after filtering, return empty embeddings |  | ||||||
|                         return batch.map(() => new Float32Array(this.config.dimension)); |  | ||||||
|                     } |  | ||||||
|  |  | ||||||
|                     if (filteredBatch.length === 1) { |  | ||||||
|                         // If only one text, use the single embedding endpoint |  | ||||||
|                         const embedding = await this.generateEmbeddings(filteredBatch[0]); |  | ||||||
|                         return [embedding]; |  | ||||||
|                     } |  | ||||||
|  |  | ||||||
|                     // Use the batch API endpoint |  | ||||||
|                     return this.generateBatchEmbeddingsWithAPI(filteredBatch); |  | ||||||
|                 }, |  | ||||||
|                 this.isBatchSizeError |  | ||||||
|             ); |  | ||||||
|         } |  | ||||||
|         catch (error: any) { |  | ||||||
|             const errorMessage = error.message || "Unknown error"; |  | ||||||
|             log.error(`OpenAI batch embedding error: ${errorMessage}`); |  | ||||||
|             throw new Error(`OpenAI batch embedding error: ${errorMessage}`); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Returns the normalization status for OpenAI embeddings |  | ||||||
|      * OpenAI embeddings are guaranteed to be normalized to unit length |  | ||||||
|      */ |  | ||||||
|     override getNormalizationStatus(): NormalizationStatus { |  | ||||||
|         return NormalizationStatus.GUARANTEED; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @@ -1,285 +0,0 @@ | |||||||
| import log from "../../../log.js"; |  | ||||||
| import { BaseEmbeddingProvider } from "../base_embeddings.js"; |  | ||||||
| import type { EmbeddingConfig } from "../embeddings_interface.js"; |  | ||||||
| import { NormalizationStatus } from "../embeddings_interface.js"; |  | ||||||
| import { LLM_CONSTANTS } from "../../constants/provider_constants.js"; |  | ||||||
| import { PROVIDER_EMBEDDING_CAPABILITIES } from "../../constants/search_constants.js"; |  | ||||||
| import type { EmbeddingModelInfo } from "../../interfaces/embedding_interfaces.js"; |  | ||||||
|  |  | ||||||
| // Use constants from the central constants file |  | ||||||
| const VOYAGE_MODEL_CONTEXT_WINDOWS = PROVIDER_EMBEDDING_CAPABILITIES.VOYAGE.MODELS; |  | ||||||
| const VOYAGE_MODEL_DIMENSIONS = Object.entries(PROVIDER_EMBEDDING_CAPABILITIES.VOYAGE.MODELS).reduce((acc, [key, value]) => { |  | ||||||
|     acc[key] = value.dimension; |  | ||||||
|     return acc; |  | ||||||
| }, {} as Record<string, number>); |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Voyage AI embedding provider implementation |  | ||||||
|  */ |  | ||||||
| export class VoyageEmbeddingProvider extends BaseEmbeddingProvider { |  | ||||||
|     override name = "voyage"; |  | ||||||
|  |  | ||||||
|     constructor(config: EmbeddingConfig) { |  | ||||||
|         super(config); |  | ||||||
|  |  | ||||||
|         // Set default base URL if not provided |  | ||||||
|         if (!this.baseUrl) { |  | ||||||
|             this.baseUrl = "https://api.voyageai.com/v1"; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Initialize the provider by detecting model capabilities |  | ||||||
|      */ |  | ||||||
|     override async initialize(): Promise<void> { |  | ||||||
|         const modelName = this.config.model || "voyage-2"; |  | ||||||
|         try { |  | ||||||
|             // Detect model capabilities |  | ||||||
|             const modelInfo = await this.getModelInfo(modelName); |  | ||||||
|  |  | ||||||
|             // Update the config dimension |  | ||||||
|             this.config.dimension = modelInfo.dimension; |  | ||||||
|  |  | ||||||
|             log.info(`Voyage AI model ${modelName} initialized with dimension ${this.config.dimension} and context window ${modelInfo.contextWidth}`); |  | ||||||
|         } catch (error: any) { |  | ||||||
|             log.error(`Error initializing Voyage AI provider: ${error.message}`); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Try to determine Voyage AI model capabilities |  | ||||||
|      */ |  | ||||||
|     private async fetchModelCapabilities(modelName: string): Promise<EmbeddingModelInfo | null> { |  | ||||||
|         try { |  | ||||||
|             // Find the closest matching model |  | ||||||
|             const modelMapKey = Object.keys(PROVIDER_EMBEDDING_CAPABILITIES.VOYAGE.MODELS).find( |  | ||||||
|                 model => modelName.startsWith(model) |  | ||||||
|             ) || "default"; |  | ||||||
|  |  | ||||||
|             // Use as keyof to tell TypeScript this is a valid key |  | ||||||
|             const modelInfo = PROVIDER_EMBEDDING_CAPABILITIES.VOYAGE.MODELS[modelMapKey as keyof typeof PROVIDER_EMBEDDING_CAPABILITIES.VOYAGE.MODELS]; |  | ||||||
|  |  | ||||||
|             return { |  | ||||||
|                 dimension: modelInfo.dimension, |  | ||||||
|                 contextWidth: modelInfo.contextWidth, |  | ||||||
|                 name: modelName, |  | ||||||
|                 type: 'float32' |  | ||||||
|             }; |  | ||||||
|         } catch (error) { |  | ||||||
|             log.info(`Could not determine capabilities for Voyage AI model ${modelName}: ${error}`); |  | ||||||
|             return null; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Get model information including embedding dimensions |  | ||||||
|      */ |  | ||||||
|     async getModelInfo(modelName: string): Promise<EmbeddingModelInfo> { |  | ||||||
|         // Check cache first |  | ||||||
|         if (this.modelInfoCache.has(modelName)) { |  | ||||||
|             return this.modelInfoCache.get(modelName)!; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Try to determine model capabilities |  | ||||||
|         const capabilities = await this.fetchModelCapabilities(modelName); |  | ||||||
|         const defaults = PROVIDER_EMBEDDING_CAPABILITIES.VOYAGE.MODELS.default; |  | ||||||
|         const contextWindow = capabilities?.contextWidth || defaults.contextWidth; |  | ||||||
|         const knownDimension = capabilities?.dimension || defaults.dimension; |  | ||||||
|  |  | ||||||
|         // For Voyage, we can use known dimensions or detect with a test call |  | ||||||
|         try { |  | ||||||
|             if (knownDimension) { |  | ||||||
|                 // Use known dimension |  | ||||||
|                 const modelInfo: EmbeddingModelInfo = { |  | ||||||
|                     dimension: knownDimension, |  | ||||||
|                     contextWidth: contextWindow, |  | ||||||
|                     name: modelName, |  | ||||||
|                     type: 'float32' |  | ||||||
|                 }; |  | ||||||
|  |  | ||||||
|                 this.modelInfoCache.set(modelName, modelInfo); |  | ||||||
|                 this.config.dimension = knownDimension; |  | ||||||
|  |  | ||||||
|                 log.info(`Using known parameters for Voyage AI model ${modelName}: dimension ${knownDimension}, context ${contextWindow}`); |  | ||||||
|                 return modelInfo; |  | ||||||
|             } else { |  | ||||||
|                 // Detect dimension with a test embedding as fallback |  | ||||||
|                 const testEmbedding = await this.generateEmbeddings("Test"); |  | ||||||
|                 const dimension = testEmbedding.length; |  | ||||||
|  |  | ||||||
|                 // Set model info based on the model name, detected dimension, and reasonable defaults |  | ||||||
|                 if (modelName.includes('voyage-2')) { |  | ||||||
|                     return { |  | ||||||
|                         dimension: dimension || 1024, |  | ||||||
|                         contextWidth: 8192, |  | ||||||
|                         name: modelName, |  | ||||||
|                         type: 'float32' |  | ||||||
|                     }; |  | ||||||
|                 } else if (modelName.includes('voyage-lite-02')) { |  | ||||||
|                     return { |  | ||||||
|                         dimension: dimension || 768, |  | ||||||
|                         contextWidth: 8192, |  | ||||||
|                         name: modelName, |  | ||||||
|                         type: 'float32' |  | ||||||
|                     }; |  | ||||||
|                 } else { |  | ||||||
|                     // Default for other Voyage models |  | ||||||
|                     return { |  | ||||||
|                         dimension: dimension || 1024, |  | ||||||
|                         contextWidth: 8192, |  | ||||||
|                         name: modelName, |  | ||||||
|                         type: 'float32' |  | ||||||
|                     }; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } catch (error: any) { |  | ||||||
|             log.info(`Could not fetch model info from Voyage AI API: ${error.message}. Using defaults.`); |  | ||||||
|  |  | ||||||
|             // Use default parameters if everything else fails |  | ||||||
|             const defaultModelInfo: EmbeddingModelInfo = { |  | ||||||
|                 dimension: 1024, // Default for Voyage models |  | ||||||
|                 contextWidth: 8192, |  | ||||||
|                 name: modelName, |  | ||||||
|                 type: 'float32' |  | ||||||
|             }; |  | ||||||
|  |  | ||||||
|             this.modelInfoCache.set(modelName, defaultModelInfo); |  | ||||||
|             this.config.dimension = defaultModelInfo.dimension; |  | ||||||
|             return defaultModelInfo; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Generate embeddings for a single text |  | ||||||
|      */ |  | ||||||
|     async generateEmbeddings(text: string): Promise<Float32Array> { |  | ||||||
|         try { |  | ||||||
|             if (!text.trim()) { |  | ||||||
|                 return new Float32Array(this.config.dimension); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Get model info to check context window |  | ||||||
|             const modelName = this.config.model || "voyage-2"; |  | ||||||
|             const modelInfo = await this.getModelInfo(modelName); |  | ||||||
|  |  | ||||||
|             // Trim text if it might exceed context window (rough character estimate) |  | ||||||
|             const charLimit = (modelInfo.contextWidth || PROVIDER_EMBEDDING_CAPABILITIES.VOYAGE.MODELS.default.contextWidth) * 4; // Rough estimate: avg 4 chars per token |  | ||||||
|             const trimmedText = text.length > charLimit ? text.substring(0, charLimit) : text; |  | ||||||
|  |  | ||||||
|             const response = await fetch(`${this.baseUrl}/embeddings`, { |  | ||||||
|                 method: 'POST', |  | ||||||
|                 headers: { |  | ||||||
|                     "Content-Type": "application/json", |  | ||||||
|                     "Authorization": `Bearer ${this.apiKey}` |  | ||||||
|                 }, |  | ||||||
|                 body: JSON.stringify({ |  | ||||||
|                     model: modelName, |  | ||||||
|                     input: trimmedText, |  | ||||||
|                     input_type: "text", |  | ||||||
|                     truncation: true |  | ||||||
|                 }) |  | ||||||
|             }); |  | ||||||
|  |  | ||||||
|             if (!response.ok) { |  | ||||||
|                 const errorData = await response.json().catch(() => ({})); |  | ||||||
|                 throw new Error(errorData.error?.message || `HTTP error ${response.status}`); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             const data = await response.json(); |  | ||||||
|             if (data && data.data && data.data[0] && data.data[0].embedding) { |  | ||||||
|                 return new Float32Array(data.data[0].embedding); |  | ||||||
|             } else { |  | ||||||
|                 throw new Error("Unexpected response structure from Voyage AI API"); |  | ||||||
|             } |  | ||||||
|         } catch (error: any) { |  | ||||||
|             const errorMessage = error.message || "Unknown error"; |  | ||||||
|             log.error(`Voyage AI embedding error: ${errorMessage}`); |  | ||||||
|             throw new Error(`Voyage AI embedding error: ${errorMessage}`); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * More specific implementation of batch size error detection for Voyage AI |  | ||||||
|      */ |  | ||||||
|     protected override isBatchSizeError(error: any): boolean { |  | ||||||
|         const errorMessage = error?.message || ''; |  | ||||||
|         const voyageBatchSizeErrorPatterns = [ |  | ||||||
|             'batch size', 'too many inputs', 'context length exceeded', |  | ||||||
|             'token limit', 'rate limit', 'limit exceeded', |  | ||||||
|             'too long', 'request too large', 'content too large' |  | ||||||
|         ]; |  | ||||||
|  |  | ||||||
|         return voyageBatchSizeErrorPatterns.some(pattern => |  | ||||||
|             errorMessage.toLowerCase().includes(pattern.toLowerCase()) |  | ||||||
|         ); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Generate embeddings for multiple texts in a single batch |  | ||||||
|      */ |  | ||||||
|     override async generateBatchEmbeddings(texts: string[]): Promise<Float32Array[]> { |  | ||||||
|         if (texts.length === 0) { |  | ||||||
|             return []; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             return await this.processWithAdaptiveBatch( |  | ||||||
|                 texts, |  | ||||||
|                 async (batch) => { |  | ||||||
|                     if (batch.length === 0) return []; |  | ||||||
|                     if (batch.length === 1) { |  | ||||||
|                         return [await this.generateEmbeddings(batch[0])]; |  | ||||||
|                     } |  | ||||||
|  |  | ||||||
|                     // For Voyage AI, we can batch embeddings |  | ||||||
|                     const modelName = this.config.model || "voyage-2"; |  | ||||||
|  |  | ||||||
|                     // Filter out empty texts |  | ||||||
|                     const validBatch = batch.map(text => text.trim() || " "); |  | ||||||
|  |  | ||||||
|                     const response = await fetch(`${this.baseUrl}/embeddings`, { |  | ||||||
|                         method: 'POST', |  | ||||||
|                         headers: { |  | ||||||
|                             "Content-Type": "application/json", |  | ||||||
|                             "Authorization": `Bearer ${this.apiKey}` |  | ||||||
|                         }, |  | ||||||
|                         body: JSON.stringify({ |  | ||||||
|                             model: modelName, |  | ||||||
|                             input: validBatch, |  | ||||||
|                             input_type: "text", |  | ||||||
|                             truncation: true |  | ||||||
|                         }) |  | ||||||
|                     }); |  | ||||||
|  |  | ||||||
|                     if (!response.ok) { |  | ||||||
|                         const errorData = await response.json().catch(() => ({})); |  | ||||||
|                         throw new Error(errorData.error?.message || `HTTP error ${response.status}`); |  | ||||||
|                     } |  | ||||||
|  |  | ||||||
|                     const data = await response.json(); |  | ||||||
|                     if (data && data.data && Array.isArray(data.data)) { |  | ||||||
|                         return data.data.map((item: any) => |  | ||||||
|                             new Float32Array(item.embedding || []) |  | ||||||
|                         ); |  | ||||||
|                     } else { |  | ||||||
|                         throw new Error("Unexpected response structure from Voyage AI batch API"); |  | ||||||
|                     } |  | ||||||
|                 }, |  | ||||||
|                 this.isBatchSizeError |  | ||||||
|             ); |  | ||||||
|         } |  | ||||||
|         catch (error: any) { |  | ||||||
|             const errorMessage = error.message || "Unknown error"; |  | ||||||
|             log.error(`Voyage AI batch embedding error: ${errorMessage}`); |  | ||||||
|             throw new Error(`Voyage AI batch embedding error: ${errorMessage}`); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Returns the normalization status for Voyage embeddings |  | ||||||
|      * Voyage embeddings are generally normalized by the API |  | ||||||
|      */ |  | ||||||
|     override getNormalizationStatus(): NormalizationStatus { |  | ||||||
|         return NormalizationStatus.GUARANTEED; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @@ -1,397 +0,0 @@ | |||||||
| import sql from "../../../services/sql.js"; |  | ||||||
| import dateUtils from "../../../services/date_utils.js"; |  | ||||||
| import log from "../../../services/log.js"; |  | ||||||
| import becca from "../../../becca/becca.js"; |  | ||||||
| import options from "../../../services/options.js"; |  | ||||||
| import { getEnabledEmbeddingProviders } from "../providers/providers.js"; |  | ||||||
| import { getNoteEmbeddingContext } from "./content_processing.js"; |  | ||||||
| import { deleteNoteEmbeddings } from "./storage.js"; |  | ||||||
| import type { QueueItem } from "./types.js"; |  | ||||||
| import { getChunkingOperations } from "./chunking/chunking_interface.js"; |  | ||||||
| import indexService from '../index_service.js'; |  | ||||||
| import { isNoteExcludedFromAIById } from "../utils/ai_exclusion_utils.js"; |  | ||||||
|  |  | ||||||
| // Track which notes are currently being processed |  | ||||||
| const notesInProcess = new Set<string>(); |  | ||||||
|  |  | ||||||
| interface FailedItemRow { |  | ||||||
|     noteId: string; |  | ||||||
|     operation: string; |  | ||||||
|     attempts: number; |  | ||||||
|     lastAttempt: string; |  | ||||||
|     error: string | null; |  | ||||||
|     failed: number; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| interface FailedItemWithTitle extends FailedItemRow { |  | ||||||
|     title?: string; |  | ||||||
|     failureType: 'chunks' | 'full'; |  | ||||||
|     isPermanent: boolean; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Queues a note for embedding update |  | ||||||
|  */ |  | ||||||
| export async function queueNoteForEmbedding(noteId: string, operation = 'UPDATE') { |  | ||||||
|     const now = dateUtils.localNowDateTime(); |  | ||||||
|     const utcNow = dateUtils.utcNowDateTime(); |  | ||||||
|  |  | ||||||
|     try { |  | ||||||
|         // Check if note is already in queue and whether it's marked as permanently failed |  | ||||||
|         const queueInfo = await sql.getRow( |  | ||||||
|             "SELECT 1 as exists_flag, failed, isProcessing FROM embedding_queue WHERE noteId = ?", |  | ||||||
|             [noteId] |  | ||||||
|         ) as {exists_flag: number, failed: number, isProcessing: number} | null; |  | ||||||
|  |  | ||||||
|         if (queueInfo) { |  | ||||||
|             // If the note is currently being processed, don't change its status |  | ||||||
|             if (queueInfo.isProcessing === 1) { |  | ||||||
|                 log.info(`Note ${noteId} is currently being processed, skipping queue update`); |  | ||||||
|                 return; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Only update if not permanently failed |  | ||||||
|             if (queueInfo.failed !== 1) { |  | ||||||
|                 // Update existing queue entry but preserve the failed status |  | ||||||
|                 await sql.execute(` |  | ||||||
|                     UPDATE embedding_queue |  | ||||||
|                     SET operation = ?, dateQueued = ?, utcDateQueued = ?, attempts = 0, error = NULL |  | ||||||
|                     WHERE noteId = ?`, |  | ||||||
|                     [operation, now, utcNow, noteId] |  | ||||||
|                 ); |  | ||||||
|             } else { |  | ||||||
|                 // Note is marked as permanently failed, don't update |  | ||||||
|                 log.info(`Note ${noteId} is marked as permanently failed, skipping automatic re-queue`); |  | ||||||
|             } |  | ||||||
|         } else { |  | ||||||
|             // Add new queue entry |  | ||||||
|             await sql.execute(` |  | ||||||
|                 INSERT INTO embedding_queue |  | ||||||
|                 (noteId, operation, dateQueued, utcDateQueued, failed, isProcessing) |  | ||||||
|                 VALUES (?, ?, ?, ?, 0, 0)`, |  | ||||||
|                 [noteId, operation, now, utcNow] |  | ||||||
|             ); |  | ||||||
|         } |  | ||||||
|     } catch (error: any) { |  | ||||||
|         // If there's a race condition where multiple events try to queue the same note simultaneously, |  | ||||||
|         // one of them will succeed and others will fail with UNIQUE constraint violation. |  | ||||||
|         // We can safely ignore this specific error since the note is already queued. |  | ||||||
|         if (error.code === 'SQLITE_CONSTRAINT_PRIMARYKEY' && error.message.includes('UNIQUE constraint failed: embedding_queue.noteId')) { |  | ||||||
|             log.info(`Note ${noteId} was already queued by another process, ignoring duplicate queue request`); |  | ||||||
|             return; |  | ||||||
|         } |  | ||||||
|         // Rethrow any other errors |  | ||||||
|         throw error; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Get notes that have failed embedding generation |  | ||||||
|  * |  | ||||||
|  * @param limit - Maximum number of failed notes to return |  | ||||||
|  * @returns List of failed notes with their error information |  | ||||||
|  */ |  | ||||||
| export async function getFailedEmbeddingNotes(limit: number = 100): Promise<any[]> { |  | ||||||
|     // Get notes with failed embedding attempts or permanently failed flag |  | ||||||
|     const failedQueueItems = sql.getRows<FailedItemRow>(` |  | ||||||
|         SELECT noteId, operation, attempts, lastAttempt, error, failed |  | ||||||
|         FROM embedding_queue |  | ||||||
|         WHERE attempts > 0 OR failed = 1 |  | ||||||
|         ORDER BY failed DESC, attempts DESC, lastAttempt DESC |  | ||||||
|         LIMIT ?`, |  | ||||||
|         [limit] |  | ||||||
|     ); |  | ||||||
|  |  | ||||||
|     // Add titles to the failed notes |  | ||||||
|     const failedNotesWithTitles: FailedItemWithTitle[] = []; |  | ||||||
|     for (const item of failedQueueItems) { |  | ||||||
|         const note = becca.getNote(item.noteId); |  | ||||||
|         if (note) { |  | ||||||
|             // Check if this is a chunking error (contains the word "chunks") |  | ||||||
|             const isChunkFailure = item.error && item.error.toLowerCase().includes('chunk'); |  | ||||||
|             const isPermanentFailure = item.failed === 1; |  | ||||||
|  |  | ||||||
|             failedNotesWithTitles.push({ |  | ||||||
|                 ...item, |  | ||||||
|                 title: note.title, |  | ||||||
|                 failureType: isChunkFailure ? 'chunks' : 'full', |  | ||||||
|                 isPermanent: isPermanentFailure |  | ||||||
|             }); |  | ||||||
|         } else { |  | ||||||
|             failedNotesWithTitles.push({ |  | ||||||
|                 ...item, |  | ||||||
|                 failureType: 'full', |  | ||||||
|                 isPermanent: item.failed === 1 |  | ||||||
|             }); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Sort by latest attempt |  | ||||||
|     failedNotesWithTitles.sort((a, b) => { |  | ||||||
|         if (a.lastAttempt && b.lastAttempt) { |  | ||||||
|             return b.lastAttempt.localeCompare(a.lastAttempt); |  | ||||||
|         } |  | ||||||
|         return 0; |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     // Limit to the specified number |  | ||||||
|     return failedNotesWithTitles.slice(0, limit); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Retry a specific failed note embedding |  | ||||||
|  */ |  | ||||||
| export async function retryFailedEmbedding(noteId: string): Promise<boolean> { |  | ||||||
|     const now = dateUtils.localNowDateTime(); |  | ||||||
|     const utcNow = dateUtils.utcNowDateTime(); |  | ||||||
|  |  | ||||||
|     // Check if the note is in the embedding queue and has failed or has attempts |  | ||||||
|     const existsInQueue = await sql.getValue( |  | ||||||
|         "SELECT 1 FROM embedding_queue WHERE noteId = ? AND (failed = 1 OR attempts > 0)", |  | ||||||
|         [noteId] |  | ||||||
|     ); |  | ||||||
|  |  | ||||||
|     if (existsInQueue) { |  | ||||||
|         // Reset the note in the queue |  | ||||||
|         await sql.execute(` |  | ||||||
|             UPDATE embedding_queue |  | ||||||
|             SET attempts = 0, error = NULL, failed = 0, dateQueued = ?, utcDateQueued = ?, priority = 10 |  | ||||||
|             WHERE noteId = ?`, |  | ||||||
|             [now, utcNow, noteId] |  | ||||||
|         ); |  | ||||||
|         return true; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return false; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Retry all failed embeddings |  | ||||||
|  * |  | ||||||
|  * @returns Number of notes queued for retry |  | ||||||
|  */ |  | ||||||
| export async function retryAllFailedEmbeddings(): Promise<number> { |  | ||||||
|     const now = dateUtils.localNowDateTime(); |  | ||||||
|     const utcNow = dateUtils.utcNowDateTime(); |  | ||||||
|  |  | ||||||
|     // Get count of all failed notes in queue (either with failed=1 or attempts>0) |  | ||||||
|     const failedCount = await sql.getValue( |  | ||||||
|         "SELECT COUNT(*) FROM embedding_queue WHERE failed = 1 OR attempts > 0" |  | ||||||
|     ) as number; |  | ||||||
|  |  | ||||||
|     if (failedCount > 0) { |  | ||||||
|         // Reset all failed notes in the queue |  | ||||||
|         await sql.execute(` |  | ||||||
|             UPDATE embedding_queue |  | ||||||
|             SET attempts = 0, error = NULL, failed = 0, dateQueued = ?, utcDateQueued = ?, priority = 10 |  | ||||||
|             WHERE failed = 1 OR attempts > 0`, |  | ||||||
|             [now, utcNow] |  | ||||||
|         ); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return failedCount; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Process the embedding queue |  | ||||||
|  */ |  | ||||||
| export async function processEmbeddingQueue() { |  | ||||||
|     if (!(await options.getOptionBool('aiEnabled'))) { |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Check if this instance should process embeddings |  | ||||||
|     const embeddingLocation = await options.getOption('embeddingGenerationLocation') || 'client'; |  | ||||||
|     const isSyncServer = await indexService.isSyncServerForEmbeddings(); |  | ||||||
|     const shouldProcessEmbeddings = embeddingLocation === 'client' || isSyncServer; |  | ||||||
|  |  | ||||||
|     if (!shouldProcessEmbeddings) { |  | ||||||
|         // This instance is not configured to process embeddings |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     const batchSize = parseInt(await options.getOption('embeddingBatchSize') || '10', 10); |  | ||||||
|     const enabledProviders = await getEnabledEmbeddingProviders(); |  | ||||||
|  |  | ||||||
|     if (enabledProviders.length === 0) { |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Get notes from queue (excluding failed ones and those being processed) |  | ||||||
|     const notes = await sql.getRows(` |  | ||||||
|         SELECT noteId, operation, attempts |  | ||||||
|         FROM embedding_queue |  | ||||||
|         WHERE failed = 0 AND isProcessing = 0 |  | ||||||
|         ORDER BY priority DESC, utcDateQueued ASC |  | ||||||
|         LIMIT ?`, |  | ||||||
|         [batchSize] |  | ||||||
|     ); |  | ||||||
|  |  | ||||||
|     if (notes.length === 0) { |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Track successfully processed notes count for progress reporting |  | ||||||
|     let processedCount = 0; |  | ||||||
|  |  | ||||||
|     for (const note of notes) { |  | ||||||
|         const noteData = note as unknown as QueueItem; |  | ||||||
|         const noteId = noteData.noteId; |  | ||||||
|  |  | ||||||
|         // Double-check that this note isn't already being processed |  | ||||||
|         if (notesInProcess.has(noteId)) { |  | ||||||
|             //log.info(`Note ${noteId} is already being processed by another thread, skipping`); |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             // Mark the note as being processed |  | ||||||
|             notesInProcess.add(noteId); |  | ||||||
|             await sql.execute( |  | ||||||
|                 "UPDATE embedding_queue SET isProcessing = 1 WHERE noteId = ?", |  | ||||||
|                 [noteId] |  | ||||||
|             ); |  | ||||||
|  |  | ||||||
|             // Skip if note no longer exists |  | ||||||
|             if (!becca.getNote(noteId)) { |  | ||||||
|                 await sql.execute( |  | ||||||
|                     "DELETE FROM embedding_queue WHERE noteId = ?", |  | ||||||
|                     [noteId] |  | ||||||
|                 ); |  | ||||||
|                 await deleteNoteEmbeddings(noteId); |  | ||||||
|                 continue; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Check if this note is excluded from AI features |  | ||||||
|             if (isNoteExcludedFromAIById(noteId)) { |  | ||||||
|                 log.info(`Note ${noteId} excluded from AI features, removing from embedding queue`); |  | ||||||
|                 await sql.execute( |  | ||||||
|                     "DELETE FROM embedding_queue WHERE noteId = ?", |  | ||||||
|                     [noteId] |  | ||||||
|                 ); |  | ||||||
|                 await deleteNoteEmbeddings(noteId); // Also remove any existing embeddings |  | ||||||
|                 continue; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             if (noteData.operation === 'DELETE') { |  | ||||||
|                 await deleteNoteEmbeddings(noteId); |  | ||||||
|                 await sql.execute( |  | ||||||
|                     "DELETE FROM embedding_queue WHERE noteId = ?", |  | ||||||
|                     [noteId] |  | ||||||
|                 ); |  | ||||||
|                 continue; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|  |  | ||||||
|             // Get note context for embedding |  | ||||||
|             const context = await getNoteEmbeddingContext(noteId); |  | ||||||
|  |  | ||||||
|             // Check if we should use chunking for large content |  | ||||||
|             const useChunking = context.content.length > 5000; |  | ||||||
|  |  | ||||||
|             // Track provider successes and failures |  | ||||||
|             let allProvidersFailed = true; |  | ||||||
|             let allProvidersSucceeded = true; |  | ||||||
|  |  | ||||||
|             // Process with each enabled provider |  | ||||||
|             for (const provider of enabledProviders) { |  | ||||||
|                 try { |  | ||||||
|                     if (useChunking) { |  | ||||||
|                         // Process large notes using chunking |  | ||||||
|                         const chunkingOps = await getChunkingOperations(); |  | ||||||
|                         await chunkingOps.processNoteWithChunking(noteId, provider, context); |  | ||||||
|                         allProvidersFailed = false; |  | ||||||
|                     } else { |  | ||||||
|                         // Standard approach: Generate a single embedding for the whole note |  | ||||||
|                         const embedding = await provider.generateNoteEmbeddings(context); |  | ||||||
|  |  | ||||||
|                         // Store embedding |  | ||||||
|                         const config = provider.getConfig(); |  | ||||||
|                         await import('./storage.js').then(storage => { |  | ||||||
|                             return storage.storeNoteEmbedding( |  | ||||||
|                                 noteId, |  | ||||||
|                                 provider.name, |  | ||||||
|                                 config.model, |  | ||||||
|                                 embedding |  | ||||||
|                             ); |  | ||||||
|                         }); |  | ||||||
|  |  | ||||||
|                         // At least one provider succeeded |  | ||||||
|                         allProvidersFailed = false; |  | ||||||
|                     } |  | ||||||
|                 } catch (providerError: any) { |  | ||||||
|                     // This provider failed |  | ||||||
|                     allProvidersSucceeded = false; |  | ||||||
|                     log.error(`Error generating embedding with provider ${provider.name} for note ${noteId}: ${providerError.message || 'Unknown error'}`); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             if (!allProvidersFailed) { |  | ||||||
|                 // At least one provider succeeded, remove from queue |  | ||||||
|                 await sql.execute( |  | ||||||
|                     "DELETE FROM embedding_queue WHERE noteId = ?", |  | ||||||
|                     [noteId] |  | ||||||
|                 ); |  | ||||||
|  |  | ||||||
|                 // Count as successfully processed |  | ||||||
|                 processedCount++; |  | ||||||
|             } else { |  | ||||||
|                 // If all providers failed, mark as failed but keep in queue |  | ||||||
|                 await sql.execute(` |  | ||||||
|                     UPDATE embedding_queue |  | ||||||
|                     SET attempts = attempts + 1, |  | ||||||
|                         lastAttempt = ?, |  | ||||||
|                         error = ?, |  | ||||||
|                         isProcessing = 0 |  | ||||||
|                     WHERE noteId = ?`, |  | ||||||
|                     [dateUtils.utcNowDateTime(), "All providers failed to generate embeddings", noteId] |  | ||||||
|                 ); |  | ||||||
|  |  | ||||||
|                 // Mark as permanently failed if too many attempts |  | ||||||
|                 if (noteData.attempts + 1 >= 3) { |  | ||||||
|                     log.error(`Marked note ${noteId} as permanently failed after multiple embedding attempts`); |  | ||||||
|  |  | ||||||
|                     // Set the failed flag but keep the actual attempts count |  | ||||||
|                     await sql.execute(` |  | ||||||
|                         UPDATE embedding_queue |  | ||||||
|                         SET failed = 1 |  | ||||||
|                         WHERE noteId = ? |  | ||||||
|                     `, [noteId]); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } catch (error: any) { |  | ||||||
|             // Update attempt count and log error |  | ||||||
|             await sql.execute(` |  | ||||||
|                 UPDATE embedding_queue |  | ||||||
|                 SET attempts = attempts + 1, |  | ||||||
|                     lastAttempt = ?, |  | ||||||
|                     error = ?, |  | ||||||
|                     isProcessing = 0 |  | ||||||
|                 WHERE noteId = ?`, |  | ||||||
|                 [dateUtils.utcNowDateTime(), error.message || 'Unknown error', noteId] |  | ||||||
|             ); |  | ||||||
|  |  | ||||||
|             log.error(`Error processing embedding for note ${noteId}: ${error.message || 'Unknown error'}`); |  | ||||||
|  |  | ||||||
|             // Mark as permanently failed if too many attempts |  | ||||||
|             if (noteData.attempts + 1 >= 3) { |  | ||||||
|                 log.error(`Marked note ${noteId} as permanently failed after multiple embedding attempts`); |  | ||||||
|  |  | ||||||
|                 // Set the failed flag but keep the actual attempts count |  | ||||||
|                 await sql.execute(` |  | ||||||
|                     UPDATE embedding_queue |  | ||||||
|                     SET failed = 1 |  | ||||||
|                     WHERE noteId = ? |  | ||||||
|                 `, [noteId]); |  | ||||||
|             } |  | ||||||
|         } finally { |  | ||||||
|             // Always clean up the processing status in the in-memory set |  | ||||||
|             notesInProcess.delete(noteId); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Update the index rebuild progress if any notes were processed |  | ||||||
|     if (processedCount > 0) { |  | ||||||
|         indexService.updateIndexRebuildProgress(processedCount); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @@ -1,64 +0,0 @@ | |||||||
| import sql from "../../../services/sql.js"; |  | ||||||
| import log from "../../../services/log.js"; |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Get current embedding statistics |  | ||||||
|  */ |  | ||||||
| export async function getEmbeddingStats() { |  | ||||||
|     const totalNotesCount = await sql.getValue( |  | ||||||
|         "SELECT COUNT(*) FROM notes WHERE isDeleted = 0" |  | ||||||
|     ) as number; |  | ||||||
|  |  | ||||||
|     const embeddedNotesCount = await sql.getValue( |  | ||||||
|         "SELECT COUNT(DISTINCT noteId) FROM note_embeddings" |  | ||||||
|     ) as number; |  | ||||||
|  |  | ||||||
|     const queuedNotesCount = await sql.getValue( |  | ||||||
|         "SELECT COUNT(*) FROM embedding_queue WHERE failed = 0" |  | ||||||
|     ) as number; |  | ||||||
|  |  | ||||||
|     const failedNotesCount = await sql.getValue( |  | ||||||
|         "SELECT COUNT(*) FROM embedding_queue WHERE failed = 1" |  | ||||||
|     ) as number; |  | ||||||
|  |  | ||||||
|     // Get the last processing time by checking the most recent embedding |  | ||||||
|     const lastProcessedDate = await sql.getValue( |  | ||||||
|         "SELECT utcDateCreated FROM note_embeddings ORDER BY utcDateCreated DESC LIMIT 1" |  | ||||||
|     ) as string | null || null; |  | ||||||
|  |  | ||||||
|     // Calculate the actual completion percentage |  | ||||||
|     // When reprocessing, we need to consider notes in the queue as not completed yet |  | ||||||
|     // We calculate the percentage of notes that are embedded and NOT in the queue |  | ||||||
|  |  | ||||||
|     // First, get the count of notes that are both in the embeddings table and queue |  | ||||||
|     const notesInQueueWithEmbeddings = await sql.getValue(` |  | ||||||
|         SELECT COUNT(DISTINCT eq.noteId) |  | ||||||
|         FROM embedding_queue eq |  | ||||||
|         JOIN note_embeddings ne ON eq.noteId = ne.noteId |  | ||||||
|     `) as number; |  | ||||||
|  |  | ||||||
|     // The number of notes with valid, up-to-date embeddings |  | ||||||
|     const upToDateEmbeddings = embeddedNotesCount - notesInQueueWithEmbeddings; |  | ||||||
|  |  | ||||||
|     // Calculate the percentage of notes that are properly embedded |  | ||||||
|     const percentComplete = totalNotesCount > 0 |  | ||||||
|         ? Math.round((upToDateEmbeddings / (totalNotesCount - failedNotesCount)) * 100) |  | ||||||
|         : 0; |  | ||||||
|  |  | ||||||
|     return { |  | ||||||
|         totalNotesCount, |  | ||||||
|         embeddedNotesCount, |  | ||||||
|         queuedNotesCount, |  | ||||||
|         failedNotesCount, |  | ||||||
|         lastProcessedDate, |  | ||||||
|         percentComplete: Math.max(0, Math.min(100, percentComplete)) // Ensure between 0-100 |  | ||||||
|     }; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Cleanup function to remove stale or unused embeddings |  | ||||||
|  */ |  | ||||||
| export function cleanupEmbeddings() { |  | ||||||
|     // Implementation can be added later when needed |  | ||||||
|     // For example, removing embeddings for deleted notes, etc. |  | ||||||
| } |  | ||||||
| @@ -1,544 +0,0 @@ | |||||||
| import sql from '../../sql.js' |  | ||||||
| import { randomString } from "../../../services/utils.js"; |  | ||||||
| import dateUtils from "../../../services/date_utils.js"; |  | ||||||
| import log from "../../log.js"; |  | ||||||
| import { embeddingToBuffer, bufferToEmbedding, cosineSimilarity, enhancedCosineSimilarity, selectOptimalEmbedding, adaptEmbeddingDimensions } from "./vector_utils.js"; |  | ||||||
| import type { EmbeddingResult } from "./types.js"; |  | ||||||
| import entityChangesService from "../../../services/entity_changes.js"; |  | ||||||
| import type { EntityChange } from "../../../services/entity_changes_interface.js"; |  | ||||||
| import { EMBEDDING_CONSTANTS } from "../constants/embedding_constants.js"; |  | ||||||
| import { SEARCH_CONSTANTS } from '../constants/search_constants.js'; |  | ||||||
| import type { NoteEmbeddingContext } from "./embeddings_interface.js"; |  | ||||||
| import becca from "../../../becca/becca.js"; |  | ||||||
| import { isNoteExcludedFromAIById } from "../utils/ai_exclusion_utils.js"; |  | ||||||
| import { getSelectedEmbeddingProvider } from '../config/configuration_helpers.js'; |  | ||||||
|  |  | ||||||
| interface Similarity { |  | ||||||
|     noteId: string; |  | ||||||
|     similarity: number; |  | ||||||
|     contentType: string; |  | ||||||
|     bonuses?: Record<string, number>; // Optional for debugging |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Creates or updates an embedding for a note |  | ||||||
|  */ |  | ||||||
| export async function storeNoteEmbedding( |  | ||||||
|     noteId: string, |  | ||||||
|     providerId: string, |  | ||||||
|     modelId: string, |  | ||||||
|     embedding: Float32Array |  | ||||||
| ): Promise<string> { |  | ||||||
|     const dimension = embedding.length; |  | ||||||
|     const embeddingBlob = embeddingToBuffer(embedding); |  | ||||||
|     const now = dateUtils.localNowDateTime(); |  | ||||||
|     const utcNow = dateUtils.utcNowDateTime(); |  | ||||||
|  |  | ||||||
|     // Check if an embedding already exists for this note and provider/model |  | ||||||
|     const existingEmbed = await getEmbeddingForNote(noteId, providerId, modelId); |  | ||||||
|     let embedId; |  | ||||||
|  |  | ||||||
|     if (existingEmbed) { |  | ||||||
|         // Update existing embedding |  | ||||||
|         embedId = existingEmbed.embedId; |  | ||||||
|         await sql.execute(` |  | ||||||
|             UPDATE note_embeddings |  | ||||||
|             SET embedding = ?, dimension = ?, version = version + 1, |  | ||||||
|                 dateModified = ?, utcDateModified = ? |  | ||||||
|             WHERE embedId = ?`, |  | ||||||
|             [embeddingBlob, dimension, now, utcNow, embedId] |  | ||||||
|         ); |  | ||||||
|     } else { |  | ||||||
|         // Create new embedding |  | ||||||
|         embedId = randomString(16); |  | ||||||
|         await sql.execute(` |  | ||||||
|             INSERT INTO note_embeddings |  | ||||||
|             (embedId, noteId, providerId, modelId, dimension, embedding, |  | ||||||
|              dateCreated, utcDateCreated, dateModified, utcDateModified) |  | ||||||
|             VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, |  | ||||||
|             [embedId, noteId, providerId, modelId, dimension, embeddingBlob, |  | ||||||
|              now, utcNow, now, utcNow] |  | ||||||
|         ); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Create entity change record for syncing |  | ||||||
|     interface EmbeddingRow { |  | ||||||
|         embedId: string; |  | ||||||
|         noteId: string; |  | ||||||
|         providerId: string; |  | ||||||
|         modelId: string; |  | ||||||
|         dimension: number; |  | ||||||
|         version: number; |  | ||||||
|         dateCreated: string; |  | ||||||
|         utcDateCreated: string; |  | ||||||
|         dateModified: string; |  | ||||||
|         utcDateModified: string; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     const row = await sql.getRow<EmbeddingRow>(` |  | ||||||
|         SELECT embedId, noteId, providerId, modelId, dimension, version, |  | ||||||
|                dateCreated, utcDateCreated, dateModified, utcDateModified |  | ||||||
|         FROM note_embeddings |  | ||||||
|         WHERE embedId = ?`, |  | ||||||
|         [embedId] |  | ||||||
|     ); |  | ||||||
|  |  | ||||||
|     if (row) { |  | ||||||
|         // Skip the actual embedding data for the hash since it's large |  | ||||||
|         const ec: EntityChange = { |  | ||||||
|             entityName: "note_embeddings", |  | ||||||
|             entityId: embedId, |  | ||||||
|             hash: `${row.noteId}|${row.providerId}|${row.modelId}|${row.dimension}|${row.version}|${row.utcDateModified}`, |  | ||||||
|             utcDateChanged: row.utcDateModified, |  | ||||||
|             isSynced: true, |  | ||||||
|             isErased: false |  | ||||||
|         }; |  | ||||||
|  |  | ||||||
|         entityChangesService.putEntityChange(ec); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return embedId; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Retrieves embedding for a specific note |  | ||||||
|  */ |  | ||||||
| export async function getEmbeddingForNote(noteId: string, providerId: string, modelId: string): Promise<EmbeddingResult | null> { |  | ||||||
|     const row = await sql.getRow(` |  | ||||||
|         SELECT embedId, noteId, providerId, modelId, dimension, embedding, version, |  | ||||||
|                dateCreated, utcDateCreated, dateModified, utcDateModified |  | ||||||
|         FROM note_embeddings |  | ||||||
|         WHERE noteId = ? AND providerId = ? AND modelId = ?`, |  | ||||||
|         [noteId, providerId, modelId] |  | ||||||
|     ); |  | ||||||
|  |  | ||||||
|     if (!row) { |  | ||||||
|         return null; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Need to cast row to any as it doesn't have type information |  | ||||||
|     const rowData = row as any; |  | ||||||
|  |  | ||||||
|     return { |  | ||||||
|         ...rowData, |  | ||||||
|         embedding: bufferToEmbedding(rowData.embedding, rowData.dimension) |  | ||||||
|     }; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Create an interface that represents the embedding row from the database |  | ||||||
| interface EmbeddingRow { |  | ||||||
|     embedId: string; |  | ||||||
|     noteId: string; |  | ||||||
|     providerId: string; |  | ||||||
|     modelId: string; |  | ||||||
|     dimension: number; |  | ||||||
|     embedding: Buffer; |  | ||||||
|     title?: string; |  | ||||||
|     type?: string; |  | ||||||
|     mime?: string; |  | ||||||
|     isDeleted?: number; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Interface for enhanced embedding with query model information |  | ||||||
| interface EnhancedEmbeddingRow extends EmbeddingRow { |  | ||||||
|     queryProviderId: string; |  | ||||||
|     queryModelId: string; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Finds similar notes based on vector similarity |  | ||||||
|  */ |  | ||||||
| export async function findSimilarNotes( |  | ||||||
|     embedding: Float32Array, |  | ||||||
|     providerId: string, |  | ||||||
|     modelId: string, |  | ||||||
|     limit = SEARCH_CONSTANTS.VECTOR_SEARCH.DEFAULT_MAX_RESULTS, |  | ||||||
|     threshold?: number,  // Made optional to use constants |  | ||||||
|     useFallback = true   // Whether to try other providers if no embeddings found |  | ||||||
| ): Promise<{noteId: string, similarity: number, contentType?: string}[]> { |  | ||||||
|     // Import constants dynamically to avoid circular dependencies |  | ||||||
|     const llmModule = await import('../../../routes/api/llm.js'); |  | ||||||
|     // Use default threshold if not provided |  | ||||||
|     const actualThreshold = threshold || SEARCH_CONSTANTS.VECTOR_SEARCH.EXACT_MATCH_THRESHOLD; |  | ||||||
|  |  | ||||||
|     try { |  | ||||||
|         log.info(`Finding similar notes with provider: ${providerId}, model: ${modelId}, dimension: ${embedding.length}, threshold: ${actualThreshold}`); |  | ||||||
|  |  | ||||||
|         // First try to find embeddings for the exact provider and model |  | ||||||
|         const embeddings = await sql.getRows(` |  | ||||||
|             SELECT ne.embedId, ne.noteId, ne.providerId, ne.modelId, ne.dimension, ne.embedding, |  | ||||||
|                  n.isDeleted, n.title, n.type, n.mime |  | ||||||
|             FROM note_embeddings ne |  | ||||||
|             JOIN notes n ON ne.noteId = n.noteId |  | ||||||
|             WHERE ne.providerId = ? AND ne.modelId = ? AND n.isDeleted = 0 |  | ||||||
|         `, [providerId, modelId]) as EmbeddingRow[]; |  | ||||||
|  |  | ||||||
|         if (embeddings && embeddings.length > 0) { |  | ||||||
|             log.info(`Found ${embeddings.length} embeddings for provider ${providerId}, model ${modelId}`); |  | ||||||
|  |  | ||||||
|             // Add query model information to each embedding for cross-model comparison |  | ||||||
|             const enhancedEmbeddings: EnhancedEmbeddingRow[] = embeddings.map(e => { |  | ||||||
|                 return { |  | ||||||
|                     embedId: e.embedId, |  | ||||||
|                     noteId: e.noteId, |  | ||||||
|                     providerId: e.providerId, |  | ||||||
|                     modelId: e.modelId, |  | ||||||
|                     dimension: e.dimension, |  | ||||||
|                     embedding: e.embedding, |  | ||||||
|                     title: e.title, |  | ||||||
|                     type: e.type, |  | ||||||
|                     mime: e.mime, |  | ||||||
|                     isDeleted: e.isDeleted, |  | ||||||
|                     queryProviderId: providerId, |  | ||||||
|                     queryModelId: modelId |  | ||||||
|                 }; |  | ||||||
|             }); |  | ||||||
|  |  | ||||||
|             return await processEmbeddings(embedding, enhancedEmbeddings, actualThreshold, limit); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // If no embeddings found and fallback is allowed, try other providers |  | ||||||
|         if (useFallback) { |  | ||||||
|             log.info(`No embeddings found for ${providerId}/${modelId}, trying fallback providers`); |  | ||||||
|  |  | ||||||
|             // Define the type for embedding metadata |  | ||||||
|             interface EmbeddingMetadata { |  | ||||||
|                 providerId: string; |  | ||||||
|                 modelId: string; |  | ||||||
|                 count: number; |  | ||||||
|                 dimension: number; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Get all available embedding metadata |  | ||||||
|             const availableEmbeddings = await sql.getRows(` |  | ||||||
|                 SELECT DISTINCT providerId, modelId, COUNT(*) as count, dimension |  | ||||||
|                 FROM note_embeddings |  | ||||||
|                 GROUP BY providerId, modelId |  | ||||||
|                 ORDER BY dimension DESC, count DESC |  | ||||||
|             `) as EmbeddingMetadata[]; |  | ||||||
|  |  | ||||||
|             if (availableEmbeddings.length > 0) { |  | ||||||
|                 log.info(`Available embeddings: ${JSON.stringify(availableEmbeddings.map(e => ({ |  | ||||||
|                     providerId: e.providerId, |  | ||||||
|                     modelId: e.modelId, |  | ||||||
|                     count: e.count, |  | ||||||
|                     dimension: e.dimension |  | ||||||
|                 })))}`); |  | ||||||
|  |  | ||||||
|                 // Import the vector utils |  | ||||||
|                 const { selectOptimalEmbedding } = await import('./vector_utils.js'); |  | ||||||
|  |  | ||||||
|                 // Get user dimension strategy preference |  | ||||||
|                 const options = (await import('../../options.js')).default; |  | ||||||
|                 const dimensionStrategy = await options.getOption('embeddingDimensionStrategy') || 'native'; |  | ||||||
|                 log.info(`Using embedding dimension strategy: ${dimensionStrategy}`); |  | ||||||
|  |  | ||||||
|                 // Find the best alternative based on highest dimension for 'native' strategy |  | ||||||
|                 if (dimensionStrategy === 'native') { |  | ||||||
|                     const bestAlternative = selectOptimalEmbedding(availableEmbeddings); |  | ||||||
|  |  | ||||||
|                     if (bestAlternative) { |  | ||||||
|                         log.info(`Using highest-dimension fallback: ${bestAlternative.providerId}/${bestAlternative.modelId} (${bestAlternative.dimension}D)`); |  | ||||||
|  |  | ||||||
|                         // Get embeddings for this provider/model |  | ||||||
|                         const alternativeEmbeddings = await sql.getRows(` |  | ||||||
|                             SELECT ne.embedId, ne.noteId, ne.providerId, ne.modelId, ne.dimension, ne.embedding, |  | ||||||
|                                 n.isDeleted, n.title, n.type, n.mime |  | ||||||
|                             FROM note_embeddings ne |  | ||||||
|                             JOIN notes n ON ne.noteId = n.noteId |  | ||||||
|                             WHERE ne.providerId = ? AND ne.modelId = ? AND n.isDeleted = 0 |  | ||||||
|                         `, [bestAlternative.providerId, bestAlternative.modelId]) as EmbeddingRow[]; |  | ||||||
|  |  | ||||||
|                         if (alternativeEmbeddings && alternativeEmbeddings.length > 0) { |  | ||||||
|                             // Add query model information to each embedding for cross-model comparison |  | ||||||
|                             const enhancedEmbeddings: EnhancedEmbeddingRow[] = alternativeEmbeddings.map(e => { |  | ||||||
|                                 return { |  | ||||||
|                                     embedId: e.embedId, |  | ||||||
|                                     noteId: e.noteId, |  | ||||||
|                                     providerId: e.providerId, |  | ||||||
|                                     modelId: e.modelId, |  | ||||||
|                                     dimension: e.dimension, |  | ||||||
|                                     embedding: e.embedding, |  | ||||||
|                                     title: e.title, |  | ||||||
|                                     type: e.type, |  | ||||||
|                                     mime: e.mime, |  | ||||||
|                                     isDeleted: e.isDeleted, |  | ||||||
|                                     queryProviderId: providerId, |  | ||||||
|                                     queryModelId: modelId |  | ||||||
|                                 }; |  | ||||||
|                             }); |  | ||||||
|  |  | ||||||
|                             return await processEmbeddings(embedding, enhancedEmbeddings, actualThreshold, limit); |  | ||||||
|                         } |  | ||||||
|                     } |  | ||||||
|                 } else { |  | ||||||
|                     // Try providers using the new configuration system |  | ||||||
|                     if (useFallback) { |  | ||||||
|                         log.info('No embeddings found for specified provider, trying fallback providers...'); |  | ||||||
|  |  | ||||||
|                         // Use the new configuration system - no string parsing! |  | ||||||
|                         const selectedProvider = await getSelectedEmbeddingProvider(); |  | ||||||
|                         const preferredProviders = selectedProvider ? [selectedProvider] : []; |  | ||||||
|  |  | ||||||
|                         log.info(`Using selected provider: ${selectedProvider || 'none'}`); |  | ||||||
|  |  | ||||||
|                         // Try providers in precedence order |  | ||||||
|                         for (const provider of preferredProviders) { |  | ||||||
|                             const providerEmbeddings = availableEmbeddings.filter(e => e.providerId === provider); |  | ||||||
|  |  | ||||||
|                             if (providerEmbeddings.length > 0) { |  | ||||||
|                                 // Choose the model with the most embeddings |  | ||||||
|                                 const bestModel = providerEmbeddings.sort((a, b) => b.count - a.count)[0]; |  | ||||||
|                                 log.info(`Found fallback provider: ${provider}, model: ${bestModel.modelId}, dimension: ${bestModel.dimension}`); |  | ||||||
|  |  | ||||||
|                                 // The 'regenerate' strategy would go here if needed |  | ||||||
|                                 // We're no longer supporting the 'adapt' strategy |  | ||||||
|                             } |  | ||||||
|                         } |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             log.info('No suitable fallback embeddings found, returning empty results'); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         return []; |  | ||||||
|     } catch (error) { |  | ||||||
|         log.error(`Error finding similar notes: ${error}`); |  | ||||||
|         return []; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Helper function to process embeddings and calculate similarities |  | ||||||
| async function processEmbeddings(queryEmbedding: Float32Array, embeddings: any[], threshold: number, limit: number) { |  | ||||||
|     const { |  | ||||||
|         enhancedCosineSimilarity, |  | ||||||
|         bufferToEmbedding, |  | ||||||
|         ContentType, |  | ||||||
|         PerformanceProfile, |  | ||||||
|         detectContentType, |  | ||||||
|         vectorDebugConfig |  | ||||||
|     } = await import('./vector_utils.js'); |  | ||||||
|  |  | ||||||
|     // Store original debug settings but keep debug disabled |  | ||||||
|     const originalDebugEnabled = vectorDebugConfig.enabled; |  | ||||||
|     const originalLogLevel = vectorDebugConfig.logLevel; |  | ||||||
|  |  | ||||||
|     // Keep debug disabled for normal operation |  | ||||||
|     vectorDebugConfig.enabled = false; |  | ||||||
|     vectorDebugConfig.recordStats = false; |  | ||||||
|  |  | ||||||
|     const options = (await import('../../options.js')).default; |  | ||||||
|  |  | ||||||
|     // Define weighting factors with defaults that can be overridden by settings |  | ||||||
|     interface SimilarityWeights { |  | ||||||
|         exactTitleMatch: number; |  | ||||||
|         titleContainsQuery: number; |  | ||||||
|         partialTitleMatch: number; |  | ||||||
|         // Add more weights as needed - examples: |  | ||||||
|         sameType?: number; |  | ||||||
|         attributeMatch?: number; |  | ||||||
|         recentlyCreated?: number; |  | ||||||
|         recentlyModified?: number; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Default weights that match our previous hardcoded values |  | ||||||
|     const defaultWeights: SimilarityWeights = { |  | ||||||
|         exactTitleMatch: 0.3, |  | ||||||
|         titleContainsQuery: 0.2, |  | ||||||
|         partialTitleMatch: 0.1, |  | ||||||
|         sameType: 0.05, |  | ||||||
|         attributeMatch: 0.05, |  | ||||||
|         recentlyCreated: 0.05, |  | ||||||
|         recentlyModified: 0.05 |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     // Get weights from options if they exist |  | ||||||
|     const weights: SimilarityWeights = { ...defaultWeights }; |  | ||||||
|     try { |  | ||||||
|         const customWeightsJSON = EMBEDDING_CONSTANTS; |  | ||||||
|         if (customWeightsJSON) { |  | ||||||
|             try { |  | ||||||
|                 const customWeights = EMBEDDING_CONSTANTS; |  | ||||||
|                 // Override defaults with any custom weights |  | ||||||
|                 Object.assign(weights, customWeights); |  | ||||||
|                 log.info(`Using custom similarity weights: ${JSON.stringify(weights)}`); |  | ||||||
|             } catch (e) { |  | ||||||
|                 log.error(`Error parsing custom similarity weights: ${e}`); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } catch (e) { |  | ||||||
|         // Use defaults if no custom weights |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Calculate similarity bonuses based on various factors |  | ||||||
|      */ |  | ||||||
|     function calculateSimilarityBonuses( |  | ||||||
|         embedding: any, |  | ||||||
|         note: any, |  | ||||||
|         queryText: string, |  | ||||||
|         weights: SimilarityWeights |  | ||||||
|     ): { bonuses: Record<string, number>, totalBonus: number } { |  | ||||||
|         const bonuses: Record<string, number> = {}; |  | ||||||
|  |  | ||||||
|         // Skip if we don't have query text |  | ||||||
|         if (!queryText || !note.title) { |  | ||||||
|             return { bonuses, totalBonus: 0 }; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         const titleLower = note.title.toLowerCase(); |  | ||||||
|         const queryLower = queryText.toLowerCase(); |  | ||||||
|  |  | ||||||
|         // 1. Exact title match |  | ||||||
|         if (titleLower === queryLower) { |  | ||||||
|             bonuses.exactTitleMatch = weights.exactTitleMatch; |  | ||||||
|         } |  | ||||||
|         // 2. Title contains the entire query |  | ||||||
|         else if (titleLower.includes(queryLower)) { |  | ||||||
|             bonuses.titleContainsQuery = weights.titleContainsQuery; |  | ||||||
|         } |  | ||||||
|         // 3. Partial term matching |  | ||||||
|         else { |  | ||||||
|             // Split query into terms and check if title contains them |  | ||||||
|             const queryTerms = queryLower.split(/\s+/).filter((term: string) => term.length > 2); |  | ||||||
|             let matchCount = 0; |  | ||||||
|  |  | ||||||
|             for (const term of queryTerms) { |  | ||||||
|                 if (titleLower.includes(term)) { |  | ||||||
|                     matchCount++; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             if (matchCount > 0 && queryTerms.length > 0) { |  | ||||||
|                 // Calculate proportion of matching terms and apply a scaled bonus |  | ||||||
|                 const matchProportion = matchCount / queryTerms.length; |  | ||||||
|                 bonuses.partialTitleMatch = weights.partialTitleMatch * matchProportion; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // 4. Add more factors as needed here |  | ||||||
|         // Example: Same note type bonus |  | ||||||
|         // if (note.type && weights.sameType) { |  | ||||||
|         //     // Note: This would need to be compared with the query context to be meaningful |  | ||||||
|         //     // For now, this is a placeholder for demonstration |  | ||||||
|         //     bonuses.sameType = weights.sameType; |  | ||||||
|         // } |  | ||||||
|  |  | ||||||
|         // Calculate total bonus |  | ||||||
|         const totalBonus = Object.values(bonuses).reduce((sum, bonus) => sum + bonus, 0); |  | ||||||
|  |  | ||||||
|         return { bonuses, totalBonus }; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     const similarities: Similarity[] = []; |  | ||||||
|  |  | ||||||
|     try { |  | ||||||
|         // Try to extract the original query text if it was added to the metadata |  | ||||||
|         // This will help us determine title matches |  | ||||||
|         const queryText = queryEmbedding.hasOwnProperty('originalQuery') |  | ||||||
|             ? (queryEmbedding as any).originalQuery |  | ||||||
|             : ''; |  | ||||||
|  |  | ||||||
|         for (const e of embeddings) { |  | ||||||
|             // Check if this note is excluded from AI features |  | ||||||
|             if (isNoteExcludedFromAIById(e.noteId)) { |  | ||||||
|                 continue; // Skip this note if it has the AI exclusion label |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             const embVector = bufferToEmbedding(e.embedding, e.dimension); |  | ||||||
|  |  | ||||||
|             // Detect content type from mime type if available |  | ||||||
|             let contentType = ContentType.GENERAL_TEXT; |  | ||||||
|             if (e.mime) { |  | ||||||
|                 contentType = detectContentType(e.mime); |  | ||||||
|                 // Debug logging removed to avoid console spam |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Select performance profile based on embedding size and use case |  | ||||||
|             // For most similarity searches, BALANCED is a good default |  | ||||||
|             const performanceProfile = PerformanceProfile.BALANCED; |  | ||||||
|  |  | ||||||
|             // Determine if this is cross-model comparison |  | ||||||
|             const isCrossModel = e.providerId !== e.queryProviderId || e.modelId !== e.queryModelId; |  | ||||||
|  |  | ||||||
|             // Calculate similarity with content-aware parameters |  | ||||||
|             let similarity = enhancedCosineSimilarity( |  | ||||||
|                 queryEmbedding, |  | ||||||
|                 embVector, |  | ||||||
|                 true, // normalize vectors to ensure consistent comparison |  | ||||||
|                 e.queryModelId,  // source model ID |  | ||||||
|                 e.modelId,       // target model ID |  | ||||||
|                 contentType,     // content-specific padding strategy |  | ||||||
|                 performanceProfile |  | ||||||
|             ); |  | ||||||
|  |  | ||||||
|             // Calculate and apply similarity bonuses |  | ||||||
|             const { bonuses, totalBonus } = calculateSimilarityBonuses( |  | ||||||
|                 queryEmbedding, |  | ||||||
|                 e, |  | ||||||
|                 queryText, |  | ||||||
|                 weights |  | ||||||
|             ); |  | ||||||
|  |  | ||||||
|             if (totalBonus > 0) { |  | ||||||
|                 similarity += totalBonus; |  | ||||||
|  |  | ||||||
|                 // Log significant bonuses for debugging |  | ||||||
|                 const significantBonuses = Object.entries(bonuses) |  | ||||||
|                     .filter(([_, value]) => value >= 0.05) |  | ||||||
|                     .map(([key, value]) => `${key}: +${value.toFixed(2)}`) |  | ||||||
|                     .join(', '); |  | ||||||
|  |  | ||||||
|                 if (significantBonuses) { |  | ||||||
|                     log.info(`Added bonuses for note "${e.title}" (${e.noteId}): ${significantBonuses}`); |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 // Cap similarity at 1.0 to maintain expected range |  | ||||||
|                 similarity = Math.min(similarity, 1.0); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             if (similarity >= threshold) { |  | ||||||
|                 similarities.push({ |  | ||||||
|                     noteId: e.noteId, |  | ||||||
|                     similarity: similarity, |  | ||||||
|                     contentType: contentType.toString(), |  | ||||||
|                     // Optionally include bonuses for debugging/analysis |  | ||||||
|                     // bonuses: bonuses |  | ||||||
|                 }); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         return similarities |  | ||||||
|             .sort((a, b) => b.similarity - a.similarity) |  | ||||||
|             .slice(0, limit); |  | ||||||
|     } finally { |  | ||||||
|         // Restore original debug settings |  | ||||||
|         vectorDebugConfig.enabled = originalDebugEnabled; |  | ||||||
|         vectorDebugConfig.logLevel = originalLogLevel; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Delete embeddings for a note |  | ||||||
|  * |  | ||||||
|  * @param noteId - The ID of the note |  | ||||||
|  * @param providerId - Optional provider ID to delete embeddings only for a specific provider |  | ||||||
|  * @param modelId - Optional model ID to delete embeddings only for a specific model |  | ||||||
|  */ |  | ||||||
| export async function deleteNoteEmbeddings(noteId: string, providerId?: string, modelId?: string) { |  | ||||||
|     let query = "DELETE FROM note_embeddings WHERE noteId = ?"; |  | ||||||
|     const params: any[] = [noteId]; |  | ||||||
|  |  | ||||||
|     if (providerId) { |  | ||||||
|         query += " AND providerId = ?"; |  | ||||||
|         params.push(providerId); |  | ||||||
|  |  | ||||||
|         if (modelId) { |  | ||||||
|             query += " AND modelId = ?"; |  | ||||||
|             params.push(modelId); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     await sql.execute(query, params); |  | ||||||
| } |  | ||||||
| @@ -1,29 +0,0 @@ | |||||||
| import type { NoteEmbeddingContext } from "./embeddings_interface.js"; |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Type definition for embedding result |  | ||||||
|  */ |  | ||||||
| export interface EmbeddingResult { |  | ||||||
|     embedId: string; |  | ||||||
|     noteId: string; |  | ||||||
|     providerId: string; |  | ||||||
|     modelId: string; |  | ||||||
|     dimension: number; |  | ||||||
|     embedding: Float32Array; |  | ||||||
|     version: number; |  | ||||||
|     dateCreated: string; |  | ||||||
|     utcDateCreated: string; |  | ||||||
|     dateModified: string; |  | ||||||
|     utcDateModified: string; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Type for queue item |  | ||||||
|  */ |  | ||||||
| export interface QueueItem { |  | ||||||
|     noteId: string; |  | ||||||
|     operation: string; |  | ||||||
|     attempts: number; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| export type { NoteEmbeddingContext }; |  | ||||||
| @@ -1,886 +0,0 @@ | |||||||
| import { SEARCH_CONSTANTS } from '../constants/search_constants.js'; |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Computes the cosine similarity between two vectors |  | ||||||
|  * If dimensions don't match, automatically adapts using the enhanced approach |  | ||||||
|  * @param normalize Optional flag to normalize vectors before comparison (default: false) |  | ||||||
|  * @param sourceModel Optional identifier for the source model |  | ||||||
|  * @param targetModel Optional identifier for the target model |  | ||||||
|  * @param contentType Optional content type for strategy selection |  | ||||||
|  * @param performanceProfile Optional performance profile |  | ||||||
|  */ |  | ||||||
| export function cosineSimilarity( |  | ||||||
|     a: Float32Array, |  | ||||||
|     b: Float32Array, |  | ||||||
|     normalize: boolean = false, |  | ||||||
|     sourceModel?: string, |  | ||||||
|     targetModel?: string, |  | ||||||
|     contentType?: ContentType, |  | ||||||
|     performanceProfile?: PerformanceProfile |  | ||||||
| ): number { |  | ||||||
|     // Use the enhanced approach that preserves more information |  | ||||||
|     return enhancedCosineSimilarity(a, b, normalize, sourceModel, targetModel, contentType, performanceProfile); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Enhanced cosine similarity that adaptively handles different dimensions |  | ||||||
|  * Instead of truncating larger embeddings, it pads smaller ones to preserve information |  | ||||||
|  * @param normalize Optional flag to normalize vectors before comparison (default: false) |  | ||||||
|  * @param sourceModel Optional identifier for the source model |  | ||||||
|  * @param targetModel Optional identifier for the target model |  | ||||||
|  * @param contentType Optional content type for strategy selection |  | ||||||
|  * @param performanceProfile Optional performance profile |  | ||||||
|  */ |  | ||||||
| export function enhancedCosineSimilarity( |  | ||||||
|     a: Float32Array, |  | ||||||
|     b: Float32Array, |  | ||||||
|     normalize: boolean = false, |  | ||||||
|     sourceModel?: string, |  | ||||||
|     targetModel?: string, |  | ||||||
|     contentType?: ContentType, |  | ||||||
|     performanceProfile?: PerformanceProfile |  | ||||||
| ): number { |  | ||||||
|     // If normalization is requested, normalize vectors first |  | ||||||
|     if (normalize) { |  | ||||||
|         a = normalizeVector(a); |  | ||||||
|         b = normalizeVector(b); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // If dimensions match, use standard calculation |  | ||||||
|     if (a.length === b.length) { |  | ||||||
|         return standardCosineSimilarity(a, b); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Log dimension adaptation |  | ||||||
|     debugLog(`Dimension mismatch: ${a.length} vs ${b.length}. Adapting dimensions...`, 'info'); |  | ||||||
|  |  | ||||||
|     // Determine if models are different |  | ||||||
|     const isCrossModelComparison = sourceModel !== targetModel && |  | ||||||
|                                   sourceModel !== undefined && |  | ||||||
|                                   targetModel !== undefined; |  | ||||||
|  |  | ||||||
|     // Context for strategy selection |  | ||||||
|     const context: StrategySelectionContext = { |  | ||||||
|         contentType: contentType || ContentType.GENERAL_TEXT, |  | ||||||
|         performanceProfile: performanceProfile || PerformanceProfile.BALANCED, |  | ||||||
|         sourceDimension: a.length, |  | ||||||
|         targetDimension: b.length, |  | ||||||
|         sourceModel, |  | ||||||
|         targetModel, |  | ||||||
|         isCrossModelComparison |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     // Select the optimal strategy based on context |  | ||||||
|     let adaptOptions: AdaptationOptions; |  | ||||||
|  |  | ||||||
|     if (a.length > b.length) { |  | ||||||
|         // Pad b to match a's dimensions |  | ||||||
|         debugLog(`Adapting embedding B (${b.length}D) to match A (${a.length}D)`, 'debug'); |  | ||||||
|  |  | ||||||
|         // Get optimal strategy |  | ||||||
|         adaptOptions = selectOptimalPaddingStrategy(context); |  | ||||||
|         const adaptedB = adaptEmbeddingDimensions(b, a.length, adaptOptions); |  | ||||||
|  |  | ||||||
|         // Record stats |  | ||||||
|         recordAdaptationStats({ |  | ||||||
|             operation: 'dimension_adaptation', |  | ||||||
|             sourceModel: targetModel, |  | ||||||
|             targetModel: sourceModel, |  | ||||||
|             sourceDimension: b.length, |  | ||||||
|             targetDimension: a.length, |  | ||||||
|             strategy: adaptOptions.strategy |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         return standardCosineSimilarity(a, adaptedB); |  | ||||||
|     } else { |  | ||||||
|         // Pad a to match b's dimensions |  | ||||||
|         debugLog(`Adapting embedding A (${a.length}D) to match B (${b.length}D)`, 'debug'); |  | ||||||
|  |  | ||||||
|         // Get optimal strategy |  | ||||||
|         adaptOptions = selectOptimalPaddingStrategy(context); |  | ||||||
|         const adaptedA = adaptEmbeddingDimensions(a, b.length, adaptOptions); |  | ||||||
|  |  | ||||||
|         // Record stats |  | ||||||
|         recordAdaptationStats({ |  | ||||||
|             operation: 'dimension_adaptation', |  | ||||||
|             sourceModel: sourceModel, |  | ||||||
|             targetModel: targetModel, |  | ||||||
|             sourceDimension: a.length, |  | ||||||
|             targetDimension: b.length, |  | ||||||
|             strategy: adaptOptions.strategy |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         return standardCosineSimilarity(adaptedA, b); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Normalizes a vector to unit length |  | ||||||
|  * @param vector The vector to normalize |  | ||||||
|  * @returns A new normalized vector |  | ||||||
|  */ |  | ||||||
| export function normalizeVector(vector: Float32Array): Float32Array { |  | ||||||
|     let magnitude = 0; |  | ||||||
|     for (let i = 0; i < vector.length; i++) { |  | ||||||
|         magnitude += vector[i] * vector[i]; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     magnitude = Math.sqrt(magnitude); |  | ||||||
|  |  | ||||||
|     // If vector is already normalized or is a zero vector, return a copy |  | ||||||
|     if (magnitude === 0 || Math.abs(magnitude - 1.0) < 1e-6) { |  | ||||||
|         return new Float32Array(vector); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Create a new normalized vector |  | ||||||
|     const normalized = new Float32Array(vector.length); |  | ||||||
|     for (let i = 0; i < vector.length; i++) { |  | ||||||
|         normalized[i] = vector[i] / magnitude; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return normalized; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Standard cosine similarity for same-dimension vectors |  | ||||||
|  */ |  | ||||||
| function standardCosineSimilarity(a: Float32Array, b: Float32Array): number { |  | ||||||
|     let dotProduct = 0; |  | ||||||
|     let aMagnitude = 0; |  | ||||||
|     let bMagnitude = 0; |  | ||||||
|  |  | ||||||
|     for (let i = 0; i < a.length; i++) { |  | ||||||
|         dotProduct += a[i] * b[i]; |  | ||||||
|         aMagnitude += a[i] * a[i]; |  | ||||||
|         bMagnitude += b[i] * b[i]; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     aMagnitude = Math.sqrt(aMagnitude); |  | ||||||
|     bMagnitude = Math.sqrt(bMagnitude); |  | ||||||
|  |  | ||||||
|     if (aMagnitude === 0 || bMagnitude === 0) { |  | ||||||
|         return 0; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return dotProduct / (aMagnitude * bMagnitude); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Identifies the optimal embedding when multiple are available |  | ||||||
|  * Prioritizes higher-dimensional embeddings as they contain more information |  | ||||||
|  */ |  | ||||||
| export function selectOptimalEmbedding(embeddings: Array<{ |  | ||||||
|     providerId: string; |  | ||||||
|     modelId: string; |  | ||||||
|     dimension: number; |  | ||||||
|     count?: number; |  | ||||||
| }>): {providerId: string; modelId: string; dimension: number} | null { |  | ||||||
|     if (!embeddings || embeddings.length === 0) return null; |  | ||||||
|  |  | ||||||
|     // First prioritize by dimension (higher is better) |  | ||||||
|     let optimal = embeddings.reduce((best, current) => |  | ||||||
|         current.dimension > best.dimension ? current : best, |  | ||||||
|         embeddings[0] |  | ||||||
|     ); |  | ||||||
|  |  | ||||||
|     return optimal; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Padding strategy options for dimension adaptation |  | ||||||
|  */ |  | ||||||
| export enum PaddingStrategy { |  | ||||||
|     ZERO = 'zero',               // Simple zero padding (default) |  | ||||||
|     MEAN = 'mean',               // Padding with mean value of source embedding |  | ||||||
|     GAUSSIAN = 'gaussian',       // Padding with Gaussian noise based on source statistics |  | ||||||
|     MIRROR = 'mirror'            // Mirroring existing values for padding |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Configuration for embedding adaptation |  | ||||||
|  */ |  | ||||||
| export interface AdaptationOptions { |  | ||||||
|     strategy: PaddingStrategy; |  | ||||||
|     seed?: number;               // Seed for random number generation (gaussian) |  | ||||||
|     variance?: number;           // Variance for gaussian noise (default: 0.01) |  | ||||||
|     normalize?: boolean;         // Whether to normalize after adaptation |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Adapts an embedding to match target dimensions with configurable strategies |  | ||||||
|  * |  | ||||||
|  * @param sourceEmbedding The original embedding |  | ||||||
|  * @param targetDimension The desired dimension |  | ||||||
|  * @param options Configuration options for the adaptation |  | ||||||
|  * @returns A new embedding with the target dimensions |  | ||||||
|  */ |  | ||||||
| export function adaptEmbeddingDimensions( |  | ||||||
|     sourceEmbedding: Float32Array, |  | ||||||
|     targetDimension: number, |  | ||||||
|     options: AdaptationOptions = { strategy: PaddingStrategy.ZERO, normalize: true } |  | ||||||
| ): Float32Array { |  | ||||||
|     const sourceDimension = sourceEmbedding.length; |  | ||||||
|  |  | ||||||
|     // If dimensions already match, return a copy of the original |  | ||||||
|     if (sourceDimension === targetDimension) { |  | ||||||
|         return new Float32Array(sourceEmbedding); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Create a new embedding with target dimensions |  | ||||||
|     const adaptedEmbedding = new Float32Array(targetDimension); |  | ||||||
|  |  | ||||||
|     if (sourceDimension < targetDimension) { |  | ||||||
|         // Copy all source values first |  | ||||||
|         adaptedEmbedding.set(sourceEmbedding); |  | ||||||
|  |  | ||||||
|         // Apply the selected padding strategy |  | ||||||
|         switch (options.strategy) { |  | ||||||
|             case PaddingStrategy.ZERO: |  | ||||||
|                 // Zero padding is already done by default |  | ||||||
|                 break; |  | ||||||
|  |  | ||||||
|             case PaddingStrategy.MEAN: |  | ||||||
|                 // Calculate mean of source embedding |  | ||||||
|                 let sum = 0; |  | ||||||
|                 for (let i = 0; i < sourceDimension; i++) { |  | ||||||
|                     sum += sourceEmbedding[i]; |  | ||||||
|                 } |  | ||||||
|                 const mean = sum / sourceDimension; |  | ||||||
|  |  | ||||||
|                 // Fill remaining dimensions with mean value |  | ||||||
|                 for (let i = sourceDimension; i < targetDimension; i++) { |  | ||||||
|                     adaptedEmbedding[i] = mean; |  | ||||||
|                 } |  | ||||||
|                 break; |  | ||||||
|  |  | ||||||
|             case PaddingStrategy.GAUSSIAN: |  | ||||||
|                 // Calculate mean and standard deviation of source embedding |  | ||||||
|                 let meanSum = 0; |  | ||||||
|                 for (let i = 0; i < sourceDimension; i++) { |  | ||||||
|                     meanSum += sourceEmbedding[i]; |  | ||||||
|                 } |  | ||||||
|                 const meanValue = meanSum / sourceDimension; |  | ||||||
|  |  | ||||||
|                 let varianceSum = 0; |  | ||||||
|                 for (let i = 0; i < sourceDimension; i++) { |  | ||||||
|                     varianceSum += Math.pow(sourceEmbedding[i] - meanValue, 2); |  | ||||||
|                 } |  | ||||||
|                 const variance = options.variance ?? Math.min(0.01, varianceSum / sourceDimension); |  | ||||||
|                 const stdDev = Math.sqrt(variance); |  | ||||||
|  |  | ||||||
|                 // Fill remaining dimensions with Gaussian noise |  | ||||||
|                 for (let i = sourceDimension; i < targetDimension; i++) { |  | ||||||
|                     // Box-Muller transform for Gaussian distribution |  | ||||||
|                     const u1 = Math.random(); |  | ||||||
|                     const u2 = Math.random(); |  | ||||||
|                     const z0 = Math.sqrt(-2.0 * Math.log(u1)) * Math.cos(2.0 * Math.PI * u2); |  | ||||||
|  |  | ||||||
|                     adaptedEmbedding[i] = meanValue + stdDev * z0; |  | ||||||
|                 } |  | ||||||
|                 break; |  | ||||||
|  |  | ||||||
|             case PaddingStrategy.MIRROR: |  | ||||||
|                 // Mirror existing values for padding |  | ||||||
|                 for (let i = sourceDimension; i < targetDimension; i++) { |  | ||||||
|                     // Cycle through source values in reverse order |  | ||||||
|                     const mirrorIndex = sourceDimension - 1 - ((i - sourceDimension) % sourceDimension); |  | ||||||
|                     adaptedEmbedding[i] = sourceEmbedding[mirrorIndex]; |  | ||||||
|                 } |  | ||||||
|                 break; |  | ||||||
|  |  | ||||||
|             default: |  | ||||||
|                 // Default to zero padding |  | ||||||
|                 break; |  | ||||||
|         } |  | ||||||
|     } else { |  | ||||||
|         // If source is larger, truncate to target dimension |  | ||||||
|         for (let i = 0; i < targetDimension; i++) { |  | ||||||
|             adaptedEmbedding[i] = sourceEmbedding[i]; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Normalize the adapted embedding if requested |  | ||||||
|     if (options.normalize) { |  | ||||||
|         return normalizeVector(adaptedEmbedding); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return adaptedEmbedding; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Converts embedding Float32Array to Buffer for storage in SQLite |  | ||||||
|  */ |  | ||||||
| export function embeddingToBuffer(embedding: Float32Array): Buffer { |  | ||||||
|     return Buffer.from(embedding.buffer); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Converts Buffer from SQLite back to Float32Array |  | ||||||
|  */ |  | ||||||
| export function bufferToEmbedding(buffer: Buffer, dimension: number): Float32Array { |  | ||||||
|     return new Float32Array(buffer.buffer, buffer.byteOffset, dimension); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Similarity metric options |  | ||||||
|  */ |  | ||||||
| export enum SimilarityMetric { |  | ||||||
|     COSINE = 'cosine',               // Standard cosine similarity |  | ||||||
|     DOT_PRODUCT = 'dot_product',     // Simple dot product (assumes normalized vectors) |  | ||||||
|     HYBRID = 'hybrid',               // Dot product + cosine hybrid |  | ||||||
|     DIM_AWARE = 'dimension_aware',   // Dimension-aware similarity that factors in dimension differences |  | ||||||
|     ENSEMBLE = 'ensemble'            // Combined score from multiple metrics |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Configuration for similarity calculation |  | ||||||
|  */ |  | ||||||
| export interface SimilarityOptions { |  | ||||||
|     metric: SimilarityMetric; |  | ||||||
|     normalize?: boolean; |  | ||||||
|     ensembleWeights?: {[key in SimilarityMetric]?: number}; |  | ||||||
|     dimensionPenalty?: number; // Penalty factor for dimension differences (0 to 1) |  | ||||||
|     sourceModel?: string;      // Source model identifier |  | ||||||
|     targetModel?: string;      // Target model identifier |  | ||||||
|     contentType?: ContentType; // Type of content being compared |  | ||||||
|     performanceProfile?: PerformanceProfile; // Performance requirements |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Computes similarity between two vectors using the specified metric |  | ||||||
|  * @param a First vector |  | ||||||
|  * @param b Second vector |  | ||||||
|  * @param options Similarity calculation options |  | ||||||
|  */ |  | ||||||
| export function computeSimilarity( |  | ||||||
|     a: Float32Array, |  | ||||||
|     b: Float32Array, |  | ||||||
|     options: SimilarityOptions = { metric: SimilarityMetric.COSINE } |  | ||||||
| ): number { |  | ||||||
|     // Apply normalization if requested |  | ||||||
|     const normalize = options.normalize ?? false; |  | ||||||
|  |  | ||||||
|     switch (options.metric) { |  | ||||||
|         case SimilarityMetric.COSINE: |  | ||||||
|             return cosineSimilarity( |  | ||||||
|                 a, b, normalize, |  | ||||||
|                 options.sourceModel, options.targetModel, |  | ||||||
|                 options.contentType, options.performanceProfile |  | ||||||
|             ); |  | ||||||
|  |  | ||||||
|         case SimilarityMetric.DOT_PRODUCT: |  | ||||||
|             // Dot product assumes normalized vectors for proper similarity measurement |  | ||||||
|             const aNorm = normalize ? normalizeVector(a) : a; |  | ||||||
|             const bNorm = normalize ? normalizeVector(b) : b; |  | ||||||
|             return computeDotProduct(aNorm, bNorm, options); |  | ||||||
|  |  | ||||||
|         case SimilarityMetric.HYBRID: |  | ||||||
|             // Hybrid approach combines dot product with cosine similarity |  | ||||||
|             // More robust against small perturbations while maintaining angle sensitivity |  | ||||||
|             return hybridSimilarity(a, b, normalize, options); |  | ||||||
|  |  | ||||||
|         case SimilarityMetric.DIM_AWARE: |  | ||||||
|             // Dimension-aware similarity that factors in dimension differences |  | ||||||
|             return dimensionAwareSimilarity( |  | ||||||
|                 a, b, normalize, |  | ||||||
|                 options.dimensionPenalty ?? 0.1, |  | ||||||
|                 options.contentType, |  | ||||||
|                 options.performanceProfile |  | ||||||
|             ); |  | ||||||
|  |  | ||||||
|         case SimilarityMetric.ENSEMBLE: |  | ||||||
|             // Ensemble scoring combines multiple metrics with weights |  | ||||||
|             return ensembleSimilarity(a, b, options); |  | ||||||
|  |  | ||||||
|         default: |  | ||||||
|             // Default to cosine similarity |  | ||||||
|             return cosineSimilarity( |  | ||||||
|                 a, b, normalize, |  | ||||||
|                 options.sourceModel, options.targetModel, |  | ||||||
|                 options.contentType, options.performanceProfile |  | ||||||
|             ); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Computes dot product between two vectors |  | ||||||
|  */ |  | ||||||
| export function computeDotProduct( |  | ||||||
|     a: Float32Array, |  | ||||||
|     b: Float32Array, |  | ||||||
|     options?: Pick<SimilarityOptions, 'contentType' | 'performanceProfile' | 'sourceModel' | 'targetModel'> |  | ||||||
| ): number { |  | ||||||
|     // Adapt dimensions if needed |  | ||||||
|     if (a.length !== b.length) { |  | ||||||
|         // Create context for strategy selection if dimensions don't match |  | ||||||
|         if (options) { |  | ||||||
|             const context: StrategySelectionContext = { |  | ||||||
|                 contentType: options.contentType || ContentType.GENERAL_TEXT, |  | ||||||
|                 performanceProfile: options.performanceProfile || PerformanceProfile.BALANCED, |  | ||||||
|                 sourceDimension: a.length, |  | ||||||
|                 targetDimension: b.length, |  | ||||||
|                 sourceModel: options.sourceModel, |  | ||||||
|                 targetModel: options.targetModel, |  | ||||||
|                 isCrossModelComparison: options.sourceModel !== options.targetModel && |  | ||||||
|                                       options.sourceModel !== undefined && |  | ||||||
|                                       options.targetModel !== undefined |  | ||||||
|             }; |  | ||||||
|  |  | ||||||
|             if (a.length > b.length) { |  | ||||||
|                 const adaptOptions = selectOptimalPaddingStrategy(context); |  | ||||||
|                 b = adaptEmbeddingDimensions(b, a.length, adaptOptions); |  | ||||||
|             } else { |  | ||||||
|                 const adaptOptions = selectOptimalPaddingStrategy(context); |  | ||||||
|                 a = adaptEmbeddingDimensions(a, b.length, adaptOptions); |  | ||||||
|             } |  | ||||||
|         } else { |  | ||||||
|             // Default behavior without options |  | ||||||
|             if (a.length > b.length) { |  | ||||||
|                 b = adaptEmbeddingDimensions(b, a.length); |  | ||||||
|             } else { |  | ||||||
|                 a = adaptEmbeddingDimensions(a, b.length); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     let dotProduct = 0; |  | ||||||
|     for (let i = 0; i < a.length; i++) { |  | ||||||
|         dotProduct += a[i] * b[i]; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return dotProduct; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Hybrid similarity combines dot product and cosine similarity |  | ||||||
|  * Provides robustness against small perturbations while maintaining angle sensitivity |  | ||||||
|  */ |  | ||||||
| export function hybridSimilarity( |  | ||||||
|     a: Float32Array, |  | ||||||
|     b: Float32Array, |  | ||||||
|     normalize: boolean = false, |  | ||||||
|     options?: Pick<SimilarityOptions, 'contentType' | 'performanceProfile' | 'sourceModel' | 'targetModel'> |  | ||||||
| ): number { |  | ||||||
|     // Get cosine similarity with full options |  | ||||||
|     const cosine = cosineSimilarity( |  | ||||||
|         a, b, normalize, |  | ||||||
|         options?.sourceModel, options?.targetModel, |  | ||||||
|         options?.contentType, options?.performanceProfile |  | ||||||
|     ); |  | ||||||
|  |  | ||||||
|     // For dot product, we should always normalize |  | ||||||
|     const aNorm = normalize ? a : normalizeVector(a); |  | ||||||
|     const bNorm = normalize ? b : normalizeVector(b); |  | ||||||
|  |  | ||||||
|     // If dimensions don't match, adapt with optimal strategy |  | ||||||
|     let adaptedA = aNorm; |  | ||||||
|     let adaptedB = bNorm; |  | ||||||
|  |  | ||||||
|     if (aNorm.length !== bNorm.length) { |  | ||||||
|         // Use optimal padding strategy |  | ||||||
|         if (options) { |  | ||||||
|             const context: StrategySelectionContext = { |  | ||||||
|                 contentType: options.contentType || ContentType.GENERAL_TEXT, |  | ||||||
|                 performanceProfile: options.performanceProfile || PerformanceProfile.BALANCED, |  | ||||||
|                 sourceDimension: aNorm.length, |  | ||||||
|                 targetDimension: bNorm.length, |  | ||||||
|                 sourceModel: options.sourceModel, |  | ||||||
|                 targetModel: options.targetModel, |  | ||||||
|                 isCrossModelComparison: options.sourceModel !== options.targetModel && |  | ||||||
|                                       options.sourceModel !== undefined && |  | ||||||
|                                       options.targetModel !== undefined |  | ||||||
|             }; |  | ||||||
|  |  | ||||||
|             if (aNorm.length < bNorm.length) { |  | ||||||
|                 const adaptOptions = selectOptimalPaddingStrategy(context); |  | ||||||
|                 adaptedA = adaptEmbeddingDimensions(aNorm, bNorm.length, adaptOptions); |  | ||||||
|             } else { |  | ||||||
|                 const adaptOptions = selectOptimalPaddingStrategy(context); |  | ||||||
|                 adaptedB = adaptEmbeddingDimensions(bNorm, aNorm.length, adaptOptions); |  | ||||||
|             } |  | ||||||
|         } else { |  | ||||||
|             // Default behavior |  | ||||||
|             adaptedA = aNorm.length < bNorm.length ? adaptEmbeddingDimensions(aNorm, bNorm.length) : aNorm; |  | ||||||
|             adaptedB = bNorm.length < aNorm.length ? adaptEmbeddingDimensions(bNorm, aNorm.length) : bNorm; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Compute dot product (should be similar to cosine for normalized vectors) |  | ||||||
|     const dot = computeDotProduct(adaptedA, adaptedB, options); |  | ||||||
|  |  | ||||||
|     // Return weighted average - giving more weight to cosine |  | ||||||
|     return 0.7 * cosine + 0.3 * dot; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Dimension-aware similarity that factors in dimension differences |  | ||||||
|  * @param dimensionPenalty Penalty factor for dimension differences (0 to 1) |  | ||||||
|  */ |  | ||||||
| export function dimensionAwareSimilarity( |  | ||||||
|     a: Float32Array, |  | ||||||
|     b: Float32Array, |  | ||||||
|     normalize: boolean = false, |  | ||||||
|     dimensionPenalty: number = 0.1, |  | ||||||
|     contentType?: ContentType, |  | ||||||
|     performanceProfile?: PerformanceProfile |  | ||||||
| ): number { |  | ||||||
|     // Basic cosine similarity with content type information |  | ||||||
|     const cosine = cosineSimilarity(a, b, normalize, undefined, undefined, contentType, performanceProfile); |  | ||||||
|  |  | ||||||
|     // If dimensions match, return standard cosine |  | ||||||
|     if (a.length === b.length) { |  | ||||||
|         return cosine; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Calculate dimension penalty |  | ||||||
|     // This penalizes vectors with very different dimensions |  | ||||||
|     const dimRatio = Math.min(a.length, b.length) / Math.max(a.length, b.length); |  | ||||||
|     const penalty = 1 - dimensionPenalty * (1 - dimRatio); |  | ||||||
|  |  | ||||||
|     // Apply penalty to similarity score |  | ||||||
|     return cosine * penalty; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Ensemble similarity combines multiple metrics with weights |  | ||||||
|  */ |  | ||||||
| export function ensembleSimilarity( |  | ||||||
|     a: Float32Array, |  | ||||||
|     b: Float32Array, |  | ||||||
|     options: SimilarityOptions |  | ||||||
| ): number { |  | ||||||
|     // Default weights if not provided |  | ||||||
|     const weights = options.ensembleWeights ?? { |  | ||||||
|         [SimilarityMetric.COSINE]: SEARCH_CONSTANTS.VECTOR_SEARCH.SIMILARITY_THRESHOLD.COSINE, |  | ||||||
|         [SimilarityMetric.HYBRID]: SEARCH_CONSTANTS.VECTOR_SEARCH.SIMILARITY_THRESHOLD.HYBRID, |  | ||||||
|         [SimilarityMetric.DIM_AWARE]: SEARCH_CONSTANTS.VECTOR_SEARCH.SIMILARITY_THRESHOLD.DIM_AWARE |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     let totalWeight = 0; |  | ||||||
|     let weightedSum = 0; |  | ||||||
|  |  | ||||||
|     // Compute each metric and apply weight |  | ||||||
|     for (const [metricStr, weight] of Object.entries(weights)) { |  | ||||||
|         const metric = metricStr as SimilarityMetric; |  | ||||||
|         if (weight && weight > 0) { |  | ||||||
|             // Skip the ensemble itself to avoid recursion |  | ||||||
|             if (metric !== SimilarityMetric.ENSEMBLE) { |  | ||||||
|                 const similarity = computeSimilarity(a, b, { |  | ||||||
|                     metric, |  | ||||||
|                     normalize: options.normalize |  | ||||||
|                 }); |  | ||||||
|  |  | ||||||
|                 weightedSum += similarity * weight; |  | ||||||
|                 totalWeight += weight; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Normalize by total weight |  | ||||||
|     return totalWeight > 0 ? weightedSum / totalWeight : cosineSimilarity(a, b, options.normalize); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Debug configuration for vector operations |  | ||||||
|  */ |  | ||||||
| export interface DebugConfig { |  | ||||||
|     enabled: boolean; |  | ||||||
|     logLevel: 'info' | 'debug' | 'warning' | 'error'; |  | ||||||
|     recordStats: boolean; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Global debug configuration, can be modified at runtime |  | ||||||
|  */ |  | ||||||
| export const vectorDebugConfig: DebugConfig = { |  | ||||||
|     enabled: false, |  | ||||||
|     logLevel: 'info', |  | ||||||
|     recordStats: false |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Statistics collected during vector operations |  | ||||||
|  */ |  | ||||||
| export interface AdaptationStats { |  | ||||||
|     timestamp: number; |  | ||||||
|     operation: string; |  | ||||||
|     sourceModel?: string; |  | ||||||
|     targetModel?: string; |  | ||||||
|     sourceDimension: number; |  | ||||||
|     targetDimension: number; |  | ||||||
|     strategy: string; |  | ||||||
|     similarity?: number; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Collection of adaptation statistics for quality auditing |  | ||||||
| export const adaptationStats: AdaptationStats[] = []; |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Log a message if debugging is enabled |  | ||||||
|  */ |  | ||||||
| function debugLog( |  | ||||||
|     message: string, |  | ||||||
|     level: 'info' | 'debug' | 'warning' | 'error' = 'info' |  | ||||||
| ): void { |  | ||||||
|     if (vectorDebugConfig.enabled) { |  | ||||||
|         const levelOrder = { 'debug': 0, 'info': 1, 'warning': 2, 'error': 3 }; |  | ||||||
|  |  | ||||||
|         if (levelOrder[level] >= levelOrder[vectorDebugConfig.logLevel]) { |  | ||||||
|             const prefix = `[VectorUtils:${level.toUpperCase()}]`; |  | ||||||
|  |  | ||||||
|             switch (level) { |  | ||||||
|                 case 'error': |  | ||||||
|                     console.error(prefix, message); |  | ||||||
|                     break; |  | ||||||
|                 case 'warning': |  | ||||||
|                     console.warn(prefix, message); |  | ||||||
|                     break; |  | ||||||
|                 case 'debug': |  | ||||||
|                     console.debug(prefix, message); |  | ||||||
|                     break; |  | ||||||
|                 default: |  | ||||||
|                     console.log(prefix, message); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Record adaptation statistics if enabled |  | ||||||
|  */ |  | ||||||
| function recordAdaptationStats(stats: Omit<AdaptationStats, 'timestamp'>): void { |  | ||||||
|     if (vectorDebugConfig.enabled && vectorDebugConfig.recordStats) { |  | ||||||
|         adaptationStats.push({ |  | ||||||
|             ...stats, |  | ||||||
|             timestamp: Date.now() |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         // Keep only the last 1000 stats to prevent memory issues |  | ||||||
|         if (adaptationStats.length > 1000) { |  | ||||||
|             adaptationStats.shift(); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Content types for embedding adaptation strategy selection |  | ||||||
|  */ |  | ||||||
| export enum ContentType { |  | ||||||
|     GENERAL_TEXT = 'general_text', |  | ||||||
|     CODE = 'code', |  | ||||||
|     STRUCTURED_DATA = 'structured_data', |  | ||||||
|     MATHEMATICAL = 'mathematical', |  | ||||||
|     MIXED = 'mixed' |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Performance profile for selecting adaptation strategy |  | ||||||
|  */ |  | ||||||
| export enum PerformanceProfile { |  | ||||||
|     MAXIMUM_QUALITY = 'maximum_quality',   // Prioritize similarity quality over speed |  | ||||||
|     BALANCED = 'balanced',                 // Balance quality and performance |  | ||||||
|     MAXIMUM_SPEED = 'maximum_speed'        // Prioritize speed over quality |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Context for selecting the optimal padding strategy |  | ||||||
|  */ |  | ||||||
| export interface StrategySelectionContext { |  | ||||||
|     contentType?: ContentType;                 // Type of content being compared |  | ||||||
|     performanceProfile?: PerformanceProfile;   // Performance requirements |  | ||||||
|     sourceDimension: number;                   // Source embedding dimension |  | ||||||
|     targetDimension: number;                   // Target embedding dimension |  | ||||||
|     sourceModel?: string;                      // Source model identifier |  | ||||||
|     targetModel?: string;                      // Target model identifier |  | ||||||
|     isHighPrecisionRequired?: boolean;         // Whether high precision is needed |  | ||||||
|     isCrossModelComparison?: boolean;          // Whether comparing across different models |  | ||||||
|     dimensionRatio?: number;                   // Custom dimension ratio threshold |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Selects the optimal padding strategy based on content type and performance considerations |  | ||||||
|  * @param context Selection context parameters |  | ||||||
|  * @returns The most appropriate padding strategy and options |  | ||||||
|  */ |  | ||||||
| export function selectOptimalPaddingStrategy( |  | ||||||
|     context: StrategySelectionContext |  | ||||||
| ): AdaptationOptions { |  | ||||||
|     const { |  | ||||||
|         contentType = ContentType.GENERAL_TEXT, |  | ||||||
|         performanceProfile = PerformanceProfile.BALANCED, |  | ||||||
|         sourceDimension, |  | ||||||
|         targetDimension, |  | ||||||
|         isHighPrecisionRequired = false, |  | ||||||
|         isCrossModelComparison = false |  | ||||||
|     } = context; |  | ||||||
|  |  | ||||||
|     // Calculate dimension ratio |  | ||||||
|     const dimRatio = Math.min(sourceDimension, targetDimension) / |  | ||||||
|                      Math.max(sourceDimension, targetDimension); |  | ||||||
|  |  | ||||||
|     // Default options |  | ||||||
|     const options: AdaptationOptions = { |  | ||||||
|         strategy: PaddingStrategy.ZERO, |  | ||||||
|         normalize: true |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     // Significant dimension difference detection |  | ||||||
|     const hasSignificantDimDifference = dimRatio < (context.dimensionRatio || 0.5); |  | ||||||
|  |  | ||||||
|     // Select strategy based on content type |  | ||||||
|     switch (contentType) { |  | ||||||
|         case ContentType.CODE: |  | ||||||
|             // Code benefits from structural patterns |  | ||||||
|             options.strategy = PaddingStrategy.MIRROR; |  | ||||||
|             break; |  | ||||||
|  |  | ||||||
|         case ContentType.STRUCTURED_DATA: |  | ||||||
|             // Structured data works well with mean-value padding |  | ||||||
|             options.strategy = PaddingStrategy.MEAN; |  | ||||||
|             break; |  | ||||||
|  |  | ||||||
|         case ContentType.MATHEMATICAL: |  | ||||||
|             // Mathematical content benefits from gaussian noise to maintain statistical properties |  | ||||||
|             options.strategy = PaddingStrategy.GAUSSIAN; |  | ||||||
|             options.variance = 0.005; // Lower variance for mathematical precision |  | ||||||
|             break; |  | ||||||
|  |  | ||||||
|         case ContentType.MIXED: |  | ||||||
|             // For mixed content, choose based on performance profile |  | ||||||
|             if (performanceProfile === PerformanceProfile.MAXIMUM_QUALITY) { |  | ||||||
|                 options.strategy = PaddingStrategy.GAUSSIAN; |  | ||||||
|             } else if (performanceProfile === PerformanceProfile.MAXIMUM_SPEED) { |  | ||||||
|                 options.strategy = PaddingStrategy.ZERO; |  | ||||||
|             } else { |  | ||||||
|                 options.strategy = PaddingStrategy.MEAN; |  | ||||||
|             } |  | ||||||
|             break; |  | ||||||
|  |  | ||||||
|         case ContentType.GENERAL_TEXT: |  | ||||||
|         default: |  | ||||||
|             // For general text, base decision on other factors |  | ||||||
|             if (isHighPrecisionRequired) { |  | ||||||
|                 options.strategy = PaddingStrategy.GAUSSIAN; |  | ||||||
|             } else if (isCrossModelComparison) { |  | ||||||
|                 options.strategy = PaddingStrategy.MEAN; |  | ||||||
|             } else { |  | ||||||
|                 options.strategy = PaddingStrategy.ZERO; |  | ||||||
|             } |  | ||||||
|             break; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Override based on performance profile if we have significant dimension differences |  | ||||||
|     if (hasSignificantDimDifference) { |  | ||||||
|         // For extreme dimension differences, specialized handling |  | ||||||
|         if (performanceProfile === PerformanceProfile.MAXIMUM_QUALITY) { |  | ||||||
|             // For quality, use gaussian noise for better statistical matching |  | ||||||
|             options.strategy = PaddingStrategy.GAUSSIAN; |  | ||||||
|             // Adjust variance based on dimension ratio |  | ||||||
|             options.variance = Math.min(0.01, 0.02 * dimRatio); |  | ||||||
|  |  | ||||||
|             // Log the significant dimension adaptation |  | ||||||
|             debugLog(`Significant dimension difference detected: ${sourceDimension} vs ${targetDimension}. ` + |  | ||||||
|                      `Ratio: ${dimRatio.toFixed(2)}. Using Gaussian strategy.`, 'warning'); |  | ||||||
|         } else if (performanceProfile === PerformanceProfile.MAXIMUM_SPEED) { |  | ||||||
|             // For speed, stick with zero padding |  | ||||||
|             options.strategy = PaddingStrategy.ZERO; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Always use zero padding for trivial dimension differences |  | ||||||
|     // (e.g. 1536 vs 1537) for performance reasons |  | ||||||
|     if (Math.abs(sourceDimension - targetDimension) <= 5) { |  | ||||||
|         options.strategy = PaddingStrategy.ZERO; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Log the selected strategy |  | ||||||
|     debugLog(`Selected padding strategy: ${options.strategy} for ` + |  | ||||||
|              `content type: ${contentType}, performance profile: ${performanceProfile}`, 'debug'); |  | ||||||
|  |  | ||||||
|     return options; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Helper function to determine content type from note context |  | ||||||
|  * @param context The note context information |  | ||||||
|  * @returns The detected content type |  | ||||||
|  */ |  | ||||||
| export function detectContentType(mime: string, content?: string): ContentType { |  | ||||||
|     // Detect based on mime type |  | ||||||
|     if (mime.includes('code') || |  | ||||||
|         mime.includes('javascript') || |  | ||||||
|         mime.includes('typescript') || |  | ||||||
|         mime.includes('python') || |  | ||||||
|         mime.includes('java') || |  | ||||||
|         mime.includes('c++') || |  | ||||||
|         mime.includes('json')) { |  | ||||||
|         return ContentType.CODE; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     if (mime.includes('xml') || |  | ||||||
|         mime.includes('csv') || |  | ||||||
|         mime.includes('sql') || |  | ||||||
|         mime.endsWith('+json')) { |  | ||||||
|         return ContentType.STRUCTURED_DATA; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     if (mime.includes('latex') || |  | ||||||
|         mime.includes('mathml') || |  | ||||||
|         mime.includes('tex')) { |  | ||||||
|         return ContentType.MATHEMATICAL; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // If we have content, we can do deeper analysis |  | ||||||
|     if (content) { |  | ||||||
|         // Detect code by looking for common patterns |  | ||||||
|         const codePatterns = [ |  | ||||||
|             /function\s+\w+\s*\(.*\)\s*{/,  // JavaScript/TypeScript function |  | ||||||
|             /def\s+\w+\s*\(.*\):/,          // Python function |  | ||||||
|             /class\s+\w+(\s+extends\s+\w+)?(\s+implements\s+\w+)?\s*{/, // Java/TypeScript class |  | ||||||
|             /import\s+.*\s+from\s+['"]/,    // JS/TS import |  | ||||||
|             /^\s*```\w+/m                    // Markdown code block |  | ||||||
|         ]; |  | ||||||
|  |  | ||||||
|         if (codePatterns.some(pattern => pattern.test(content))) { |  | ||||||
|             return ContentType.CODE; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Detect structured data |  | ||||||
|         const structuredPatterns = [ |  | ||||||
|             /^\s*[{\[]/,                     // JSON-like start |  | ||||||
|             /^\s*<\?xml/,                    // XML declaration |  | ||||||
|             /^\s*<[a-z]+>/i,                 // HTML/XML tag |  | ||||||
|             /^\s*(\w+,)+\w+$/m,              // CSV-like |  | ||||||
|             /CREATE\s+TABLE|SELECT\s+.*\s+FROM/i  // SQL |  | ||||||
|         ]; |  | ||||||
|  |  | ||||||
|         if (structuredPatterns.some(pattern => pattern.test(content))) { |  | ||||||
|             return ContentType.STRUCTURED_DATA; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Detect mathematical content |  | ||||||
|         const mathPatterns = [ |  | ||||||
|             /\$\$.*\$\$/s,                   // LaTeX block |  | ||||||
|             /\\begin{equation}/,             // LaTeX equation environment |  | ||||||
|             /\\sum|\\int|\\frac|\\sqrt/,     // Common LaTeX math commands |  | ||||||
|         ]; |  | ||||||
|  |  | ||||||
|         if (mathPatterns.some(pattern => pattern.test(content))) { |  | ||||||
|             return ContentType.MATHEMATICAL; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Check for mixed content |  | ||||||
|         const hasMixedContent = |  | ||||||
|             (codePatterns.some(pattern => pattern.test(content)) && |  | ||||||
|              content.split(/\s+/).length > 100) || // Code and substantial text |  | ||||||
|             (content.includes('```') && |  | ||||||
|              content.replace(/```.*?```/gs, '').length > 200); // Markdown with code blocks and text |  | ||||||
|  |  | ||||||
|         if (hasMixedContent) { |  | ||||||
|             return ContentType.MIXED; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Default to general text |  | ||||||
|     return ContentType.GENERAL_TEXT; |  | ||||||
| } |  | ||||||
| @@ -1,970 +0,0 @@ | |||||||
| /** |  | ||||||
|  * LLM Index Service |  | ||||||
|  * |  | ||||||
|  * Centralized service for managing knowledge base indexing for LLM features. |  | ||||||
|  * This service coordinates: |  | ||||||
|  * - Note embedding generation and management |  | ||||||
|  * - Smart context retrieval for LLM queries |  | ||||||
|  * - Progressive indexing of the knowledge base |  | ||||||
|  * - Optimization of the semantic search capabilities |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| import log from "../log.js"; |  | ||||||
| import options from "../options.js"; |  | ||||||
| import becca from "../../becca/becca.js"; |  | ||||||
| import beccaLoader from "../../becca/becca_loader.js"; |  | ||||||
| import vectorStore from "./embeddings/index.js"; |  | ||||||
| import providerManager from "./providers/providers.js"; |  | ||||||
| import { ContextExtractor } from "./context/index.js"; |  | ||||||
| import eventService from "../events.js"; |  | ||||||
| import sql from "../sql.js"; |  | ||||||
| import sqlInit from "../sql_init.js"; |  | ||||||
| import { CONTEXT_PROMPTS } from './constants/llm_prompt_constants.js'; |  | ||||||
| import { SEARCH_CONSTANTS } from './constants/search_constants.js'; |  | ||||||
| import { isNoteExcludedFromAI } from "./utils/ai_exclusion_utils.js"; |  | ||||||
| import { hasWorkingEmbeddingProviders } from "./provider_validation.js"; |  | ||||||
|  |  | ||||||
| export class IndexService { |  | ||||||
|     private initialized = false; |  | ||||||
|     private indexingInProgress = false; |  | ||||||
|     private contextExtractor = new ContextExtractor(); |  | ||||||
|     private automaticIndexingInterval?: NodeJS.Timeout; |  | ||||||
|  |  | ||||||
|     // Index rebuilding tracking |  | ||||||
|     private indexRebuildInProgress = false; |  | ||||||
|     private indexRebuildProgress = 0; |  | ||||||
|     private indexRebuildTotal = 0; |  | ||||||
|     private indexRebuildCurrent = 0; |  | ||||||
|  |  | ||||||
|     // Configuration |  | ||||||
|     private defaultQueryDepth = SEARCH_CONSTANTS.HIERARCHY.DEFAULT_QUERY_DEPTH; |  | ||||||
|     private maxNotesPerQuery = SEARCH_CONSTANTS.HIERARCHY.MAX_NOTES_PER_QUERY; |  | ||||||
|     private defaultSimilarityThreshold = SEARCH_CONSTANTS.VECTOR_SEARCH.EXACT_MATCH_THRESHOLD; |  | ||||||
|     private indexUpdateInterval = 3600000; // 1 hour in milliseconds |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Initialize the index service |  | ||||||
|      */ |  | ||||||
|     async initialize() { |  | ||||||
|         if (this.initialized) return; |  | ||||||
|  |  | ||||||
|         // Setup event listeners for note changes |  | ||||||
|         this.setupEventListeners(); |  | ||||||
|  |  | ||||||
|         // Setup automatic indexing if enabled |  | ||||||
|         if (await options.getOptionBool('embeddingAutoUpdateEnabled')) { |  | ||||||
|             this.setupAutomaticIndexing(); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         this.initialized = true; |  | ||||||
|         log.info("Index service initialized"); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Setup event listeners for index updates |  | ||||||
|      */ |  | ||||||
|     private setupEventListeners() { |  | ||||||
|         // Listen for note content changes |  | ||||||
|         eventService.subscribe(eventService.NOTE_CONTENT_CHANGE, async ({ entity }) => { |  | ||||||
|             if (entity && entity.noteId) { |  | ||||||
|                 // Always queue notes for indexing, but the actual processing will depend on configuration |  | ||||||
|                 await this.queueNoteForIndexing(entity.noteId); |  | ||||||
|             } |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         // Listen for new notes |  | ||||||
|         eventService.subscribe(eventService.ENTITY_CREATED, async ({ entityName, entity }) => { |  | ||||||
|             if (entityName === "notes" && entity && entity.noteId) { |  | ||||||
|                 await this.queueNoteForIndexing(entity.noteId); |  | ||||||
|             } |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         // Listen for note title changes |  | ||||||
|         eventService.subscribe(eventService.NOTE_TITLE_CHANGED, async ({ noteId }) => { |  | ||||||
|             if (noteId) { |  | ||||||
|                 await this.queueNoteForIndexing(noteId); |  | ||||||
|             } |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         // Listen for changes in AI settings |  | ||||||
|         eventService.subscribe(eventService.ENTITY_CHANGED, async ({ entityName, entity }) => { |  | ||||||
|             if (entityName === "options" && entity && entity.name) { |  | ||||||
|                 if (entity.name.startsWith('ai') || entity.name.startsWith('embedding')) { |  | ||||||
|                     log.info("AI settings changed, updating index service configuration"); |  | ||||||
|                     await this.updateConfiguration(); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         }); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Set up automatic indexing of notes |  | ||||||
|      */ |  | ||||||
|     private setupAutomaticIndexing() { |  | ||||||
|         // Clear existing interval if any |  | ||||||
|         if (this.automaticIndexingInterval) { |  | ||||||
|             clearInterval(this.automaticIndexingInterval); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Create new interval |  | ||||||
|         this.automaticIndexingInterval = setInterval(async () => { |  | ||||||
|             try { |  | ||||||
|                 if (!this.indexingInProgress) { |  | ||||||
|                     await this.runBatchIndexing(50); // Processing logic handles sync server checks |  | ||||||
|                 } |  | ||||||
|             } catch (error: any) { |  | ||||||
|                 log.error(`Error in automatic indexing: ${error.message || "Unknown error"}`); |  | ||||||
|             } |  | ||||||
|         }, this.indexUpdateInterval); |  | ||||||
|  |  | ||||||
|         log.info("Automatic indexing scheduled"); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Update service configuration from options |  | ||||||
|      */ |  | ||||||
|     private async updateConfiguration() { |  | ||||||
|         try { |  | ||||||
|             // Update indexing interval |  | ||||||
|             const intervalMs = parseInt(await options.getOption('embeddingUpdateInterval') || '3600000', 10); |  | ||||||
|             this.indexUpdateInterval = intervalMs; |  | ||||||
|  |  | ||||||
|             // Check if this instance should process embeddings |  | ||||||
|             const embeddingLocation = await options.getOption('embeddingGenerationLocation') || 'client'; |  | ||||||
|             const isSyncServer = await this.isSyncServerForEmbeddings(); |  | ||||||
|             const shouldProcessEmbeddings = embeddingLocation === 'client' || isSyncServer; |  | ||||||
|  |  | ||||||
|             // Update automatic indexing setting |  | ||||||
|             const autoIndexing = await options.getOptionBool('embeddingAutoUpdateEnabled'); |  | ||||||
|             if (autoIndexing && shouldProcessEmbeddings && !this.automaticIndexingInterval) { |  | ||||||
|                 this.setupAutomaticIndexing(); |  | ||||||
|                 log.info(`Index service: Automatic indexing enabled, processing embeddings ${isSyncServer ? 'as sync server' : 'as client'}`); |  | ||||||
|             } else if (autoIndexing && !shouldProcessEmbeddings && this.automaticIndexingInterval) { |  | ||||||
|                 clearInterval(this.automaticIndexingInterval); |  | ||||||
|                 this.automaticIndexingInterval = undefined; |  | ||||||
|                 log.info("Index service: Automatic indexing disabled for this instance based on configuration"); |  | ||||||
|             } else if (!autoIndexing && this.automaticIndexingInterval) { |  | ||||||
|                 clearInterval(this.automaticIndexingInterval); |  | ||||||
|                 this.automaticIndexingInterval = undefined; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Update similarity threshold |  | ||||||
|             const similarityThreshold = await options.getOption('embeddingSimilarityThreshold'); |  | ||||||
|             this.defaultSimilarityThreshold = parseFloat(similarityThreshold || '0.65'); |  | ||||||
|  |  | ||||||
|             // Update max notes per query |  | ||||||
|             const maxNotesPerQuery = await options.getOption('maxNotesPerLlmQuery'); |  | ||||||
|             this.maxNotesPerQuery = parseInt(maxNotesPerQuery || '10', 10); |  | ||||||
|  |  | ||||||
|             log.info("Index service configuration updated"); |  | ||||||
|         } catch (error: any) { |  | ||||||
|             log.error(`Error updating index service configuration: ${error.message || "Unknown error"}`); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Queue a note for indexing |  | ||||||
|      */ |  | ||||||
|     async queueNoteForIndexing(noteId: string, priority = false) { |  | ||||||
|         if (!this.initialized) { |  | ||||||
|             await this.initialize(); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             // Always queue notes for indexing, regardless of where embedding generation happens |  | ||||||
|             // The actual processing will be determined when the queue is processed |  | ||||||
|             await vectorStore.queueNoteForEmbedding(noteId, 'UPDATE'); |  | ||||||
|             return true; |  | ||||||
|         } catch (error: any) { |  | ||||||
|             log.error(`Error queueing note ${noteId} for indexing: ${error.message || "Unknown error"}`); |  | ||||||
|             return false; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Start full knowledge base indexing |  | ||||||
|      * @param force - Whether to force reindexing of all notes |  | ||||||
|      */ |  | ||||||
|     async startFullIndexing(force = false) { |  | ||||||
|         if (!this.initialized) { |  | ||||||
|             await this.initialize(); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (this.indexingInProgress) { |  | ||||||
|             throw new Error("Indexing already in progress"); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             // Check if this instance should process embeddings |  | ||||||
|             const embeddingLocation = await options.getOption('embeddingGenerationLocation') || 'client'; |  | ||||||
|             const isSyncServer = await this.isSyncServerForEmbeddings(); |  | ||||||
|             const shouldProcessEmbeddings = embeddingLocation === 'client' || isSyncServer; |  | ||||||
|  |  | ||||||
|             if (!shouldProcessEmbeddings) { |  | ||||||
|                 // This instance is not configured to process embeddings |  | ||||||
|                 log.info("Skipping full indexing as this instance is not configured to process embeddings"); |  | ||||||
|                 return false; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             this.indexingInProgress = true; |  | ||||||
|             this.indexRebuildInProgress = true; |  | ||||||
|             this.indexRebuildProgress = 0; |  | ||||||
|             this.indexRebuildCurrent = 0; |  | ||||||
|  |  | ||||||
|             // Reset index rebuild progress |  | ||||||
|             const totalEmbeddings = await sql.getValue("SELECT COUNT(*) FROM note_embeddings") as number; |  | ||||||
|  |  | ||||||
|             if (totalEmbeddings === 0) { |  | ||||||
|                 // If there are no embeddings yet, we need to create them first |  | ||||||
|                 const totalNotes = await sql.getValue("SELECT COUNT(*) FROM notes WHERE isDeleted = 0") as number; |  | ||||||
|                 this.indexRebuildTotal = totalNotes; |  | ||||||
|  |  | ||||||
|                 log.info("No embeddings found, starting full embedding generation first"); |  | ||||||
|                 await this.reprocessAllNotes(); |  | ||||||
|                 log.info("Full embedding generation initiated"); |  | ||||||
|             } else { |  | ||||||
|                 // For index rebuild, use the number of embeddings as the total |  | ||||||
|                 this.indexRebuildTotal = totalEmbeddings; |  | ||||||
|  |  | ||||||
|                 if (force) { |  | ||||||
|                     // Use the new rebuildSearchIndex function that doesn't regenerate embeddings |  | ||||||
|                     log.info("Starting forced index rebuild without regenerating embeddings"); |  | ||||||
|                     setTimeout(async () => { |  | ||||||
|                         try { |  | ||||||
|                             await vectorStore.rebuildSearchIndex(); |  | ||||||
|                             this.indexRebuildInProgress = false; |  | ||||||
|                             this.indexRebuildProgress = 100; |  | ||||||
|                             log.info("Index rebuild completed successfully"); |  | ||||||
|                         } catch (error: any) { |  | ||||||
|                             log.error(`Error during index rebuild: ${error.message || "Unknown error"}`); |  | ||||||
|                             this.indexRebuildInProgress = false; |  | ||||||
|                         } |  | ||||||
|                     }, 0); |  | ||||||
|                 } else { |  | ||||||
|                     // Check current stats |  | ||||||
|                     const stats = await vectorStore.getEmbeddingStats(); |  | ||||||
|  |  | ||||||
|                     // Only start indexing if we're below 90% completion or if embeddings exist but need optimization |  | ||||||
|                     if (stats.percentComplete < 90) { |  | ||||||
|                         log.info("Embedding coverage below 90%, starting full embedding generation"); |  | ||||||
|                         await this.reprocessAllNotes(); |  | ||||||
|                         log.info("Full embedding generation initiated"); |  | ||||||
|                     } else { |  | ||||||
|                         log.info(`Embedding coverage at ${stats.percentComplete}%, starting index optimization`); |  | ||||||
|                         setTimeout(async () => { |  | ||||||
|                             try { |  | ||||||
|                                 await vectorStore.rebuildSearchIndex(); |  | ||||||
|                                 this.indexRebuildInProgress = false; |  | ||||||
|                                 this.indexRebuildProgress = 100; |  | ||||||
|                                 log.info("Index optimization completed successfully"); |  | ||||||
|                             } catch (error: any) { |  | ||||||
|                                 log.error(`Error during index optimization: ${error.message || "Unknown error"}`); |  | ||||||
|                                 this.indexRebuildInProgress = false; |  | ||||||
|                             } |  | ||||||
|                         }, 0); |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             return true; |  | ||||||
|         } catch (error: any) { |  | ||||||
|             log.error(`Error starting full indexing: ${error.message || "Unknown error"}`); |  | ||||||
|             this.indexRebuildInProgress = false; |  | ||||||
|             return false; |  | ||||||
|         } finally { |  | ||||||
|             this.indexingInProgress = false; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Update index rebuild progress |  | ||||||
|      * @param processed - Number of notes processed |  | ||||||
|      */ |  | ||||||
|     updateIndexRebuildProgress(processed: number) { |  | ||||||
|         if (!this.indexRebuildInProgress) return; |  | ||||||
|  |  | ||||||
|         this.indexRebuildCurrent += processed; |  | ||||||
|  |  | ||||||
|         if (this.indexRebuildTotal > 0) { |  | ||||||
|             this.indexRebuildProgress = Math.min( |  | ||||||
|                 Math.round((this.indexRebuildCurrent / this.indexRebuildTotal) * 100), |  | ||||||
|                 100 |  | ||||||
|             ); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (this.indexRebuildCurrent >= this.indexRebuildTotal) { |  | ||||||
|             this.indexRebuildInProgress = false; |  | ||||||
|             this.indexRebuildProgress = 100; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Get the current index rebuild progress |  | ||||||
|      */ |  | ||||||
|     getIndexRebuildStatus() { |  | ||||||
|         return { |  | ||||||
|             inProgress: this.indexRebuildInProgress, |  | ||||||
|             progress: this.indexRebuildProgress, |  | ||||||
|             total: this.indexRebuildTotal, |  | ||||||
|             current: this.indexRebuildCurrent |  | ||||||
|         }; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Run a batch indexing job for a limited number of notes |  | ||||||
|      * @param batchSize - Maximum number of notes to process |  | ||||||
|      */ |  | ||||||
|     async runBatchIndexing(batchSize = 20) { |  | ||||||
|         if (!this.initialized) { |  | ||||||
|             await this.initialize(); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (this.indexingInProgress) { |  | ||||||
|             return false; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             this.indexingInProgress = true; |  | ||||||
|  |  | ||||||
|             // Check if this instance should process embeddings |  | ||||||
|             const embeddingLocation = await options.getOption('embeddingGenerationLocation') || 'client'; |  | ||||||
|             const isSyncServer = await this.isSyncServerForEmbeddings(); |  | ||||||
|             const shouldProcessEmbeddings = embeddingLocation === 'client' || isSyncServer; |  | ||||||
|  |  | ||||||
|             if (!shouldProcessEmbeddings) { |  | ||||||
|                 // This instance is not configured to process embeddings |  | ||||||
|                 return false; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Process the embedding queue (batch size is controlled by embeddingBatchSize option) |  | ||||||
|             await vectorStore.processEmbeddingQueue(); |  | ||||||
|  |  | ||||||
|             return true; |  | ||||||
|         } catch (error: any) { |  | ||||||
|             log.error(`Error in batch indexing: ${error.message || "Unknown error"}`); |  | ||||||
|             return false; |  | ||||||
|         } finally { |  | ||||||
|             this.indexingInProgress = false; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Get the current indexing statistics |  | ||||||
|      */ |  | ||||||
|     async getIndexingStats() { |  | ||||||
|         if (!this.initialized) { |  | ||||||
|             await this.initialize(); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             const stats = await vectorStore.getEmbeddingStats(); |  | ||||||
|  |  | ||||||
|             return { |  | ||||||
|                 ...stats, |  | ||||||
|                 isIndexing: this.indexingInProgress, |  | ||||||
|                 automaticIndexingEnabled: !!this.automaticIndexingInterval |  | ||||||
|             }; |  | ||||||
|         } catch (error: any) { |  | ||||||
|             log.error(`Error getting indexing stats: ${error.message || "Unknown error"}`); |  | ||||||
|             return { |  | ||||||
|                 totalNotesCount: 0, |  | ||||||
|                 embeddedNotesCount: 0, |  | ||||||
|                 queuedNotesCount: 0, |  | ||||||
|                 failedNotesCount: 0, |  | ||||||
|                 percentComplete: 0, |  | ||||||
|                 isIndexing: this.indexingInProgress, |  | ||||||
|                 automaticIndexingEnabled: !!this.automaticIndexingInterval, |  | ||||||
|                 error: error.message || "Unknown error" |  | ||||||
|             }; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Get information about failed embedding attempts |  | ||||||
|      */ |  | ||||||
|     async getFailedIndexes(limit = 100) { |  | ||||||
|         if (!this.initialized) { |  | ||||||
|             await this.initialize(); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             return await vectorStore.getFailedEmbeddingNotes(limit); |  | ||||||
|         } catch (error: any) { |  | ||||||
|             log.error(`Error getting failed indexes: ${error.message || "Unknown error"}`); |  | ||||||
|             return []; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Retry indexing a specific note that previously failed |  | ||||||
|      */ |  | ||||||
|     async retryFailedNote(noteId: string) { |  | ||||||
|         if (!this.initialized) { |  | ||||||
|             await this.initialize(); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             return await vectorStore.retryFailedEmbedding(noteId); |  | ||||||
|         } catch (error: any) { |  | ||||||
|             log.error(`Error retrying failed note ${noteId}: ${error.message || "Unknown error"}`); |  | ||||||
|             return false; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Retry all failed indexing operations |  | ||||||
|      */ |  | ||||||
|     async retryAllFailedNotes() { |  | ||||||
|         if (!this.initialized) { |  | ||||||
|             await this.initialize(); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             const count = await vectorStore.retryAllFailedEmbeddings(); |  | ||||||
|             log.info(`Queued ${count} failed notes for retry`); |  | ||||||
|             return count; |  | ||||||
|         } catch (error: any) { |  | ||||||
|             log.error(`Error retrying all failed notes: ${error.message || "Unknown error"}`); |  | ||||||
|             return 0; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Find semantically similar notes to a given query |  | ||||||
|      * @param query - Text query to find similar notes for |  | ||||||
|      * @param contextNoteId - Optional note ID to restrict search to a branch |  | ||||||
|      * @param limit - Maximum number of results to return |  | ||||||
|      */ |  | ||||||
|     async findSimilarNotes( |  | ||||||
|         query: string, |  | ||||||
|         contextNoteId?: string, |  | ||||||
|         limit = 10 |  | ||||||
|     ) { |  | ||||||
|         if (!this.initialized) { |  | ||||||
|             await this.initialize(); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             // Get the selected embedding provider on-demand |  | ||||||
|             const selectedEmbeddingProvider = await options.getOption('embeddingSelectedProvider'); |  | ||||||
|             const provider = selectedEmbeddingProvider |  | ||||||
|                 ? await providerManager.getOrCreateEmbeddingProvider(selectedEmbeddingProvider) |  | ||||||
|                 : (await providerManager.getEnabledEmbeddingProviders())[0]; |  | ||||||
|  |  | ||||||
|             if (!provider) { |  | ||||||
|                 throw new Error("No embedding provider available"); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             log.info(`Searching with embedding provider: ${provider.name}, model: ${provider.getConfig().model}`); |  | ||||||
|  |  | ||||||
|             // Generate embedding for the query |  | ||||||
|             const embedding = await provider.generateEmbeddings(query); |  | ||||||
|             log.info(`Generated embedding for query: "${query}" (${embedding.length} dimensions)`); |  | ||||||
|  |  | ||||||
|             // Add the original query as a property to the embedding |  | ||||||
|             // This is used for title matching in the vector search |  | ||||||
|             Object.defineProperty(embedding, 'originalQuery', { |  | ||||||
|                 value: query, |  | ||||||
|                 writable: false, |  | ||||||
|                 enumerable: true, |  | ||||||
|                 configurable: false |  | ||||||
|             }); |  | ||||||
|  |  | ||||||
|             // Store query text in a global cache for possible regeneration with different providers |  | ||||||
|             // Use a type declaration to avoid TypeScript errors |  | ||||||
|             interface CustomGlobal { |  | ||||||
|                 recentEmbeddingQueries?: Record<string, string>; |  | ||||||
|             } |  | ||||||
|             const globalWithCache = global as unknown as CustomGlobal; |  | ||||||
|  |  | ||||||
|             if (!globalWithCache.recentEmbeddingQueries) { |  | ||||||
|                 globalWithCache.recentEmbeddingQueries = {}; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Use a substring of the embedding as a key (full embedding is too large) |  | ||||||
|             const embeddingKey = embedding.toString().substring(0, 100); |  | ||||||
|             globalWithCache.recentEmbeddingQueries[embeddingKey] = query; |  | ||||||
|  |  | ||||||
|             // Limit cache size to prevent memory leaks (keep max 50 recent queries) |  | ||||||
|             const keys = Object.keys(globalWithCache.recentEmbeddingQueries); |  | ||||||
|             if (keys.length > 50) { |  | ||||||
|                 delete globalWithCache.recentEmbeddingQueries[keys[0]]; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Get Note IDs to search, optionally filtered by branch |  | ||||||
|             let similarNotes: { noteId: string; title: string; similarity: number; contentType?: string }[] = []; |  | ||||||
|  |  | ||||||
|             // Check if we need to restrict search to a specific branch |  | ||||||
|             if (contextNoteId) { |  | ||||||
|                 const note = becca.getNote(contextNoteId); |  | ||||||
|                 if (!note) { |  | ||||||
|                     throw new Error(`Context note ${contextNoteId} not found`); |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 // Get all note IDs in the branch |  | ||||||
|                 const branchNoteIds = new Set<string>(); |  | ||||||
|                 const collectNoteIds = (noteId: string) => { |  | ||||||
|                     branchNoteIds.add(noteId); |  | ||||||
|                     const note = becca.getNote(noteId); |  | ||||||
|                     if (note) { |  | ||||||
|                         for (const childNote of note.getChildNotes()) { |  | ||||||
|                             if (!branchNoteIds.has(childNote.noteId)) { |  | ||||||
|                                 collectNoteIds(childNote.noteId); |  | ||||||
|                             } |  | ||||||
|                         } |  | ||||||
|                     } |  | ||||||
|                 }; |  | ||||||
|  |  | ||||||
|                 collectNoteIds(contextNoteId); |  | ||||||
|  |  | ||||||
|                 // Get embeddings for all notes in the branch |  | ||||||
|                 const config = provider.getConfig(); |  | ||||||
|  |  | ||||||
|                 // Import the ContentType detection from vector utils |  | ||||||
|                 const { ContentType, detectContentType, cosineSimilarity } = await import('./embeddings/vector_utils.js'); |  | ||||||
|  |  | ||||||
|                 for (const noteId of branchNoteIds) { |  | ||||||
|                     const noteEmbedding = await vectorStore.getEmbeddingForNote( |  | ||||||
|                         noteId, |  | ||||||
|                         provider.name, |  | ||||||
|                         config.model |  | ||||||
|                     ); |  | ||||||
|  |  | ||||||
|                     if (noteEmbedding) { |  | ||||||
|                         // Get the note to determine its content type |  | ||||||
|                         const note = becca.getNote(noteId); |  | ||||||
|                         if (note) { |  | ||||||
|                             // Detect content type from mime type |  | ||||||
|                             const contentType = detectContentType(note.mime, ''); |  | ||||||
|  |  | ||||||
|                             // Use content-aware similarity calculation |  | ||||||
|                             const similarity = cosineSimilarity( |  | ||||||
|                                 embedding, |  | ||||||
|                                 noteEmbedding.embedding, |  | ||||||
|                                 true, // normalize |  | ||||||
|                                 config.model, // source model |  | ||||||
|                                 noteEmbedding.providerId, // target model (use providerId) |  | ||||||
|                                 contentType, // content type for padding strategy |  | ||||||
|                                 undefined // use default BALANCED performance profile |  | ||||||
|                             ); |  | ||||||
|  |  | ||||||
|                             if (similarity >= this.defaultSimilarityThreshold) { |  | ||||||
|                                 similarNotes.push({ |  | ||||||
|                                     noteId, |  | ||||||
|                                     title: note.title, |  | ||||||
|                                     similarity, |  | ||||||
|                                     contentType: contentType.toString() |  | ||||||
|                                 }); |  | ||||||
|                             } |  | ||||||
|                         } |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 // Sort by similarity and return top results |  | ||||||
|                 return similarNotes |  | ||||||
|                     .sort((a, b) => b.similarity - a.similarity) |  | ||||||
|                     .slice(0, limit); |  | ||||||
|             } else { |  | ||||||
|                 // Search across all notes |  | ||||||
|                 const config = provider.getConfig(); |  | ||||||
|                 const results = await vectorStore.findSimilarNotes( |  | ||||||
|                     embedding, |  | ||||||
|                     provider.name, |  | ||||||
|                     config.model, |  | ||||||
|                     limit, |  | ||||||
|                     this.defaultSimilarityThreshold |  | ||||||
|                 ); |  | ||||||
|  |  | ||||||
|                 // Enhance results with note titles |  | ||||||
|                 similarNotes = results.map(result => { |  | ||||||
|                     const note = becca.getNote(result.noteId); |  | ||||||
|                     return { |  | ||||||
|                         noteId: result.noteId, |  | ||||||
|                         title: note ? note.title : 'Unknown Note', |  | ||||||
|                         similarity: result.similarity, |  | ||||||
|                         contentType: result.contentType |  | ||||||
|                     }; |  | ||||||
|                 }); |  | ||||||
|  |  | ||||||
|                 return similarNotes; |  | ||||||
|             } |  | ||||||
|         } catch (error: any) { |  | ||||||
|             log.error(`Error finding similar notes: ${error.message || "Unknown error"}`); |  | ||||||
|             return []; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Generate context for an LLM query based on relevance to the user's question |  | ||||||
|      * @param query - The user's question |  | ||||||
|      * @param contextNoteId - Optional ID of a note to use as context root |  | ||||||
|      * @param depth - Depth of context to include (1-4) |  | ||||||
|      */ |  | ||||||
|     async generateQueryContext( |  | ||||||
|         query: string, |  | ||||||
|         contextNoteId?: string, |  | ||||||
|         depth = 2 |  | ||||||
|     ) { |  | ||||||
|         if (!this.initialized) { |  | ||||||
|             await this.initialize(); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             // Get embedding providers on-demand |  | ||||||
|             const providers = await providerManager.getEnabledEmbeddingProviders(); |  | ||||||
|             if (providers.length === 0) { |  | ||||||
|                 return "I don't have access to your note embeddings. Please configure an embedding provider in your AI settings."; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Find similar notes to the query |  | ||||||
|             const similarNotes = await this.findSimilarNotes( |  | ||||||
|                 query, |  | ||||||
|                 contextNoteId, |  | ||||||
|                 this.maxNotesPerQuery |  | ||||||
|             ); |  | ||||||
|  |  | ||||||
|             if (similarNotes.length === 0) { |  | ||||||
|                 return CONTEXT_PROMPTS.INDEX_NO_NOTES_CONTEXT; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Build context from the similar notes |  | ||||||
|             let context = `I found some relevant information in your notes that may help answer: "${query}"\n\n`; |  | ||||||
|  |  | ||||||
|             for (const note of similarNotes) { |  | ||||||
|                 const noteObj = becca.getNote(note.noteId); |  | ||||||
|                 if (!noteObj) continue; |  | ||||||
|  |  | ||||||
|                 context += `## ${noteObj.title}\n`; |  | ||||||
|  |  | ||||||
|                 // Add parent context for better understanding |  | ||||||
|                 const parents = noteObj.getParentNotes(); |  | ||||||
|                 if (parents.length > 0) { |  | ||||||
|                     context += `Path: ${parents.map(p => p.title).join(' > ')}\n`; |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 // Add content based on depth |  | ||||||
|                 if (depth >= 2) { |  | ||||||
|                     const content = await this.contextExtractor.getNoteContent(note.noteId); |  | ||||||
|                     if (content) { |  | ||||||
|                         // For larger content, use summary |  | ||||||
|                         if (content.length > 2000) { |  | ||||||
|                             const summary = await this.contextExtractor.summarizeContent(content, noteObj.title); |  | ||||||
|                             context += `${summary}\n[Content summarized due to length]\n\n`; |  | ||||||
|                         } else { |  | ||||||
|                             context += `${content}\n\n`; |  | ||||||
|                         } |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 // Add child note titles for more context if depth >= 3 |  | ||||||
|                 if (depth >= 3) { |  | ||||||
|                     const childNotes = noteObj.getChildNotes(); |  | ||||||
|                     if (childNotes.length > 0) { |  | ||||||
|                         context += `Child notes: ${childNotes.slice(0, 5).map(n => n.title).join(', ')}`; |  | ||||||
|                         if (childNotes.length > 5) { |  | ||||||
|                             context += ` and ${childNotes.length - 5} more`; |  | ||||||
|                         } |  | ||||||
|                         context += `\n\n`; |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 // Add attribute context for even deeper understanding if depth >= 4 |  | ||||||
|                 if (depth >= 4) { |  | ||||||
|                     const attributes = noteObj.getOwnedAttributes(); |  | ||||||
|                     if (attributes.length > 0) { |  | ||||||
|                         const relevantAttrs = attributes.filter(a => |  | ||||||
|                             !a.name.startsWith('_') && !a.name.startsWith('child:') && !a.name.startsWith('relation:') |  | ||||||
|                         ); |  | ||||||
|  |  | ||||||
|                         if (relevantAttrs.length > 0) { |  | ||||||
|                             context += `Attributes: ${relevantAttrs.map(a => |  | ||||||
|                                 `${a.type === 'label' ? '#' : '~'}${a.name}${a.value ? '=' + a.value : ''}` |  | ||||||
|                             ).join(', ')}\n\n`; |  | ||||||
|                         } |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Add instructions about how to reference the notes |  | ||||||
|             context += "When referring to information from these notes in your response, please cite them by their titles " + |  | ||||||
|                       "(e.g., \"According to your note on [Title]...\"). If the information doesn't contain what you need, " + |  | ||||||
|                       "just say so and use your general knowledge instead."; |  | ||||||
|  |  | ||||||
|             return context; |  | ||||||
|         } catch (error: any) { |  | ||||||
|             log.error(`Error generating query context: ${error.message || "Unknown error"}`); |  | ||||||
|             return "I'm an AI assistant helping with your Trilium notes. I encountered an error while retrieving context from your notes, but I'll try to assist based on general knowledge."; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Check if this instance is a sync server and should generate embeddings |  | ||||||
|      */ |  | ||||||
|     async isSyncServerForEmbeddings() { |  | ||||||
|         // Check if this is a sync server (no syncServerHost means this is a sync server) |  | ||||||
|         const syncServerHost = await options.getOption('syncServerHost'); |  | ||||||
|         const isSyncServer = !syncServerHost; |  | ||||||
|  |  | ||||||
|         // Check if embedding generation should happen on the sync server |  | ||||||
|         const embeddingLocation = await options.getOption('embeddingGenerationLocation') || 'client'; |  | ||||||
|         const shouldGenerateOnSyncServer = embeddingLocation === 'sync_server'; |  | ||||||
|  |  | ||||||
|         return isSyncServer && shouldGenerateOnSyncServer; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Generate a comprehensive index entry for a note |  | ||||||
|      * This prepares all metadata and contexts for optimal LLM retrieval |  | ||||||
|      */ |  | ||||||
|     async generateNoteIndex(noteId: string) { |  | ||||||
|         if (!this.initialized) { |  | ||||||
|             await this.initialize(); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             const note = becca.getNote(noteId); |  | ||||||
|             if (!note) { |  | ||||||
|                 throw new Error(`Note ${noteId} not found`); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Check if this note is excluded from AI features |  | ||||||
|             if (isNoteExcludedFromAI(note)) { |  | ||||||
|                 log.info(`Note ${noteId} (${note.title}) excluded from AI indexing due to exclusion label`); |  | ||||||
|                 return true; // Return true to indicate successful handling (exclusion is intentional) |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Check where embedding generation should happen |  | ||||||
|             const embeddingLocation = await options.getOption('embeddingGenerationLocation') || 'client'; |  | ||||||
|  |  | ||||||
|             // If embedding generation should happen on the sync server and we're not the sync server, |  | ||||||
|             // just queue the note for embedding but don't generate it |  | ||||||
|             const isSyncServer = await this.isSyncServerForEmbeddings(); |  | ||||||
|             const shouldSkipGeneration = embeddingLocation === 'sync_server' && !isSyncServer; |  | ||||||
|  |  | ||||||
|             if (shouldSkipGeneration) { |  | ||||||
|                 // We're not the sync server, so just queue the note for embedding |  | ||||||
|                 // The sync server will handle the actual embedding generation |  | ||||||
|                 log.info(`Note ${noteId} queued for embedding generation on sync server`); |  | ||||||
|                 return true; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Get complete note context for indexing |  | ||||||
|             const context = await vectorStore.getNoteEmbeddingContext(noteId); |  | ||||||
|  |  | ||||||
|             // Generate embedding with the selected provider |  | ||||||
|             const selectedEmbeddingProvider = await options.getOption('embeddingSelectedProvider'); |  | ||||||
|             const provider = selectedEmbeddingProvider |  | ||||||
|                 ? await providerManager.getOrCreateEmbeddingProvider(selectedEmbeddingProvider) |  | ||||||
|                 : (await providerManager.getEnabledEmbeddingProviders())[0]; |  | ||||||
|  |  | ||||||
|             if (provider) { |  | ||||||
|                 try { |  | ||||||
|                     const embedding = await provider.generateNoteEmbeddings(context); |  | ||||||
|                     if (embedding) { |  | ||||||
|                         const config = provider.getConfig(); |  | ||||||
|                         await vectorStore.storeNoteEmbedding( |  | ||||||
|                             noteId, |  | ||||||
|                             provider.name, |  | ||||||
|                             config.model, |  | ||||||
|                             embedding |  | ||||||
|                         ); |  | ||||||
|                     } |  | ||||||
|                 } catch (error: any) { |  | ||||||
|                     log.error(`Error generating embedding with provider ${provider.name} for note ${noteId}: ${error.message || "Unknown error"}`); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             return true; |  | ||||||
|         } catch (error: any) { |  | ||||||
|             log.error(`Error generating note index for ${noteId}: ${error.message || "Unknown error"}`); |  | ||||||
|             return false; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Start embedding generation (called when AI is enabled) |  | ||||||
|      */ |  | ||||||
|     async startEmbeddingGeneration() { |  | ||||||
|         try { |  | ||||||
|             log.info("Starting embedding generation system"); |  | ||||||
|  |  | ||||||
|             const aiEnabled = options.getOptionOrNull('aiEnabled') === "true"; |  | ||||||
|             if (!aiEnabled) { |  | ||||||
|                 log.error("Cannot start embedding generation - AI features are disabled"); |  | ||||||
|                 throw new Error("AI features must be enabled first"); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Re-initialize if needed |  | ||||||
|             if (!this.initialized) { |  | ||||||
|                 await this.initialize(); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Check if this instance should process embeddings |  | ||||||
|             const embeddingLocation = await options.getOption('embeddingGenerationLocation') || 'client'; |  | ||||||
|             const isSyncServer = await this.isSyncServerForEmbeddings(); |  | ||||||
|             const shouldProcessEmbeddings = embeddingLocation === 'client' || isSyncServer; |  | ||||||
|  |  | ||||||
|             if (!shouldProcessEmbeddings) { |  | ||||||
|                 log.info("This instance is not configured to process embeddings"); |  | ||||||
|                 return; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Get embedding providers (will be created on-demand when needed) |  | ||||||
|             const providers = await providerManager.getEnabledEmbeddingProviders(); |  | ||||||
|             if (providers.length === 0) { |  | ||||||
|                 log.info("No embedding providers configured, but continuing initialization"); |  | ||||||
|             } else { |  | ||||||
|                 log.info(`Found ${providers.length} embedding providers: ${providers.map(p => p.name).join(', ')}`); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Setup automatic indexing if enabled |  | ||||||
|             if (await options.getOptionBool('embeddingAutoUpdateEnabled')) { |  | ||||||
|                 this.setupAutomaticIndexing(); |  | ||||||
|                 log.info(`Automatic embedding indexing started ${isSyncServer ? 'as sync server' : 'as client'}`); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Start background processing of the embedding queue |  | ||||||
|             const { setupEmbeddingBackgroundProcessing } = await import('./embeddings/events.js'); |  | ||||||
|             await setupEmbeddingBackgroundProcessing(); |  | ||||||
|  |  | ||||||
|             // Re-initialize event listeners |  | ||||||
|             this.setupEventListeners(); |  | ||||||
|  |  | ||||||
|             // Queue notes that don't have embeddings for current providers |  | ||||||
|             await this.queueNotesForMissingEmbeddings(); |  | ||||||
|  |  | ||||||
|             // Start processing the queue immediately |  | ||||||
|             await this.runBatchIndexing(20); |  | ||||||
|  |  | ||||||
|             log.info("Embedding generation started successfully"); |  | ||||||
|         } catch (error: any) { |  | ||||||
|             log.error(`Error starting embedding generation: ${error.message || "Unknown error"}`); |  | ||||||
|             throw error; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Queue notes that don't have embeddings for current provider settings |  | ||||||
|      */ |  | ||||||
|     async queueNotesForMissingEmbeddings() { |  | ||||||
|         try { |  | ||||||
|             // Wait for becca to be fully loaded before accessing notes |  | ||||||
|             await beccaLoader.beccaLoaded; |  | ||||||
|  |  | ||||||
|             // Get all non-deleted notes |  | ||||||
|             const allNotes = Object.values(becca.notes).filter(note => !note.isDeleted); |  | ||||||
|  |  | ||||||
|             // Get enabled providers |  | ||||||
|             const providers = await providerManager.getEnabledEmbeddingProviders(); |  | ||||||
|             if (providers.length === 0) { |  | ||||||
|                 return; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             let queuedCount = 0; |  | ||||||
|             let excludedCount = 0; |  | ||||||
|  |  | ||||||
|             // Process notes in batches to avoid overwhelming the system |  | ||||||
|             const batchSize = 100; |  | ||||||
|             for (let i = 0; i < allNotes.length; i += batchSize) { |  | ||||||
|                 const batch = allNotes.slice(i, i + batchSize); |  | ||||||
|  |  | ||||||
|                 for (const note of batch) { |  | ||||||
|                     try { |  | ||||||
|                         // Skip notes excluded from AI |  | ||||||
|                         if (isNoteExcludedFromAI(note)) { |  | ||||||
|                             excludedCount++; |  | ||||||
|                             continue; |  | ||||||
|                         } |  | ||||||
|  |  | ||||||
|                         // Check if note needs embeddings for any enabled provider |  | ||||||
|                         let needsEmbedding = false; |  | ||||||
|  |  | ||||||
|                         for (const provider of providers) { |  | ||||||
|                             const config = provider.getConfig(); |  | ||||||
|                             const existingEmbedding = await vectorStore.getEmbeddingForNote( |  | ||||||
|                                 note.noteId, |  | ||||||
|                                 provider.name, |  | ||||||
|                                 config.model |  | ||||||
|                             ); |  | ||||||
|  |  | ||||||
|                             if (!existingEmbedding) { |  | ||||||
|                                 needsEmbedding = true; |  | ||||||
|                                 break; |  | ||||||
|                             } |  | ||||||
|                         } |  | ||||||
|  |  | ||||||
|                         if (needsEmbedding) { |  | ||||||
|                             await vectorStore.queueNoteForEmbedding(note.noteId, 'UPDATE'); |  | ||||||
|                             queuedCount++; |  | ||||||
|                         } |  | ||||||
|                     } catch (error: any) { |  | ||||||
|                         log.error(`Error checking embeddings for note ${note.noteId}: ${error.message || 'Unknown error'}`); |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|             } |  | ||||||
|         } catch (error: any) { |  | ||||||
|             log.error(`Error queuing notes for missing embeddings: ${error.message || 'Unknown error'}`); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Reprocess all notes to update embeddings |  | ||||||
|      */ |  | ||||||
|     async reprocessAllNotes() { |  | ||||||
|         if (!this.initialized) { |  | ||||||
|             await this.initialize(); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             // Get all non-deleted note IDs |  | ||||||
|             const noteIds = await sql.getColumn("SELECT noteId FROM notes WHERE isDeleted = 0"); |  | ||||||
|  |  | ||||||
|             // Process each note ID |  | ||||||
|             for (const noteId of noteIds) { |  | ||||||
|                 await vectorStore.queueNoteForEmbedding(noteId as string, 'UPDATE'); |  | ||||||
|             } |  | ||||||
|         } catch (error: any) { |  | ||||||
|             log.error(`Error reprocessing all notes: ${error.message || 'Unknown error'}`); |  | ||||||
|             throw error; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Stop embedding generation (called when AI is disabled) |  | ||||||
|      */ |  | ||||||
|     async stopEmbeddingGeneration() { |  | ||||||
|         try { |  | ||||||
|             log.info("Stopping embedding generation system"); |  | ||||||
|  |  | ||||||
|             // Clear automatic indexing interval |  | ||||||
|             if (this.automaticIndexingInterval) { |  | ||||||
|                 clearInterval(this.automaticIndexingInterval); |  | ||||||
|                 this.automaticIndexingInterval = undefined; |  | ||||||
|                 log.info("Automatic indexing stopped"); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Stop the background processing from embeddings/events.ts |  | ||||||
|             const { stopEmbeddingBackgroundProcessing } = await import('./embeddings/events.js'); |  | ||||||
|             stopEmbeddingBackgroundProcessing(); |  | ||||||
|  |  | ||||||
|             // Clear all embedding providers to clean up resources |  | ||||||
|             providerManager.clearAllEmbeddingProviders(); |  | ||||||
|  |  | ||||||
|             // Mark as not indexing |  | ||||||
|             this.indexingInProgress = false; |  | ||||||
|             this.indexRebuildInProgress = false; |  | ||||||
|  |  | ||||||
|             log.info("Embedding generation stopped successfully"); |  | ||||||
|         } catch (error: any) { |  | ||||||
|             log.error(`Error stopping embedding generation: ${error.message || "Unknown error"}`); |  | ||||||
|             throw error; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Create singleton instance |  | ||||||
| const indexService = new IndexService(); |  | ||||||
| export default indexService; |  | ||||||
| @@ -1,5 +1,10 @@ | |||||||
| import type { ChatResponse } from '../ai_interface.js'; | import type { ChatResponse } from '../ai_interface.js'; | ||||||
| import type { VectorSearchResult } from '../context_extractors/vector_search_tool.js'; | // VectorSearchResult type definition moved here since vector search tool was removed | ||||||
|  | export interface VectorSearchResult { | ||||||
|  |     searchResults: Array<any>; | ||||||
|  |     totalResults: number; | ||||||
|  |     executionTime: number; | ||||||
|  | } | ||||||
| import type { NoteInfo, NotePathInfo, NoteHierarchyLevel } from '../context_extractors/note_navigator_tool.js'; | import type { NoteInfo, NotePathInfo, NoteHierarchyLevel } from '../context_extractors/note_navigator_tool.js'; | ||||||
| import type { DecomposedQuery, SubQuery } from '../context_extractors/query_decomposition_tool.js'; | import type { DecomposedQuery, SubQuery } from '../context_extractors/query_decomposition_tool.js'; | ||||||
| import type { ThinkingProcess, ThinkingStep } from '../context_extractors/contextual_thinking_tool.js'; | import type { ThinkingProcess, ThinkingStep } from '../context_extractors/contextual_thinking_tool.js'; | ||||||
|   | |||||||
| @@ -7,7 +7,6 @@ export interface ProviderMetadata { | |||||||
|   name: string; |   name: string; | ||||||
|   capabilities: { |   capabilities: { | ||||||
|     chat: boolean; |     chat: boolean; | ||||||
|     embeddings: boolean; |  | ||||||
|     streaming: boolean; |     streaming: boolean; | ||||||
|     functionCalling?: boolean; |     functionCalling?: boolean; | ||||||
|   }; |   }; | ||||||
|   | |||||||
| @@ -21,13 +21,6 @@ export interface ModelConfig { | |||||||
|     capabilities?: ModelCapabilities; |     capabilities?: ModelCapabilities; | ||||||
| } | } | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Embedding provider precedence configuration |  | ||||||
|  */ |  | ||||||
| export interface EmbeddingProviderPrecedenceConfig { |  | ||||||
|     providers: EmbeddingProviderType[]; |  | ||||||
|     defaultProvider?: EmbeddingProviderType; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * Model capabilities |  * Model capabilities | ||||||
| @@ -47,7 +40,6 @@ export interface ModelCapabilities { | |||||||
| export interface AIConfig { | export interface AIConfig { | ||||||
|     enabled: boolean; |     enabled: boolean; | ||||||
|     selectedProvider: ProviderType | null; |     selectedProvider: ProviderType | null; | ||||||
|     selectedEmbeddingProvider: EmbeddingProviderType | null; |  | ||||||
|     defaultModels: Record<ProviderType, string | undefined>; |     defaultModels: Record<ProviderType, string | undefined>; | ||||||
|     providerSettings: ProviderSettings; |     providerSettings: ProviderSettings; | ||||||
| } | } | ||||||
| @@ -84,10 +76,6 @@ export interface OllamaSettings { | |||||||
|  */ |  */ | ||||||
| export type ProviderType = 'openai' | 'anthropic' | 'ollama'; | export type ProviderType = 'openai' | 'anthropic' | 'ollama'; | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Valid embedding provider types |  | ||||||
|  */ |  | ||||||
| export type EmbeddingProviderType = 'openai' | 'voyage' | 'ollama' | 'local'; |  | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * Model identifier with provider prefix (e.g., "openai:gpt-4" or "ollama:llama2") |  * Model identifier with provider prefix (e.g., "openai:gpt-4" or "ollama:llama2") | ||||||
|   | |||||||
| @@ -1,108 +0,0 @@ | |||||||
| /** |  | ||||||
|  * Interface for embedding provider configuration |  | ||||||
|  */ |  | ||||||
| export interface EmbeddingProviderConfig { |  | ||||||
|   name: string; |  | ||||||
|   model: string; |  | ||||||
|   dimension: number; |  | ||||||
|   type: 'float32' | 'int8' | 'uint8' | 'float16'; |  | ||||||
|   enabled?: boolean; |  | ||||||
|   priority?: number; |  | ||||||
|   baseUrl?: string; |  | ||||||
|   apiKey?: string; |  | ||||||
|   contextWidth?: number; |  | ||||||
|   batchSize?: number; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Interface for embedding model information |  | ||||||
|  */ |  | ||||||
| export interface EmbeddingModelInfo { |  | ||||||
|   name: string; |  | ||||||
|   dimension: number; |  | ||||||
|   contextWidth?: number; |  | ||||||
|   maxBatchSize?: number; |  | ||||||
|   tokenizer?: string; |  | ||||||
|   type: 'float32' | 'int8' | 'uint8' | 'float16'; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Interface for embedding provider |  | ||||||
|  */ |  | ||||||
| export interface EmbeddingProvider { |  | ||||||
|   getName(): string; |  | ||||||
|   getModel(): string; |  | ||||||
|   getDimension(): number; |  | ||||||
|   getType(): 'float32' | 'int8' | 'uint8' | 'float16'; |  | ||||||
|   isEnabled(): boolean; |  | ||||||
|   getPriority(): number; |  | ||||||
|   getMaxBatchSize(): number; |  | ||||||
|   generateEmbedding(text: string): Promise<Float32Array>; |  | ||||||
|   generateBatchEmbeddings(texts: string[]): Promise<Float32Array[]>; |  | ||||||
|   initialize(): Promise<void>; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Interface for embedding process result |  | ||||||
|  */ |  | ||||||
| export interface EmbeddingProcessResult { |  | ||||||
|   noteId: string; |  | ||||||
|   title: string; |  | ||||||
|   success: boolean; |  | ||||||
|   message?: string; |  | ||||||
|   error?: Error; |  | ||||||
|   chunks?: number; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Interface for embedding queue item |  | ||||||
|  */ |  | ||||||
| export interface EmbeddingQueueItem { |  | ||||||
|   id: number; |  | ||||||
|   noteId: string; |  | ||||||
|   status: 'pending' | 'processing' | 'completed' | 'failed' | 'retrying'; |  | ||||||
|   provider: string; |  | ||||||
|   model: string; |  | ||||||
|   dimension: number; |  | ||||||
|   type: string; |  | ||||||
|   attempts: number; |  | ||||||
|   lastAttempt: string | null; |  | ||||||
|   dateCreated: string; |  | ||||||
|   dateCompleted: string | null; |  | ||||||
|   error: string | null; |  | ||||||
|   chunks: number; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Interface for embedding batch processing |  | ||||||
|  */ |  | ||||||
| export interface EmbeddingBatch { |  | ||||||
|   texts: string[]; |  | ||||||
|   noteIds: string[]; |  | ||||||
|   indexes: number[]; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Interface for embedding search result |  | ||||||
|  */ |  | ||||||
| export interface EmbeddingSearchResult { |  | ||||||
|   noteId: string; |  | ||||||
|   similarity: number; |  | ||||||
|   title?: string; |  | ||||||
|   content?: string; |  | ||||||
|   parentId?: string; |  | ||||||
|   parentTitle?: string; |  | ||||||
|   dateCreated?: string; |  | ||||||
|   dateModified?: string; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Interface for embedding chunk |  | ||||||
|  */ |  | ||||||
| export interface EmbeddingChunk { |  | ||||||
|   id: number; |  | ||||||
|   noteId: string; |  | ||||||
|   content: string; |  | ||||||
|   embedding: Float32Array | Int8Array | Uint8Array; |  | ||||||
|   metadata?: Record<string, unknown>; |  | ||||||
| } |  | ||||||
| @@ -1,159 +1,85 @@ | |||||||
| import log from '../log.js'; | import log from '../log.js'; | ||||||
| import type { ModelCapabilities } from './interfaces/model_capabilities.js'; | import type { ModelCapabilities } from './interfaces/model_capabilities.js'; | ||||||
| import { MODEL_CAPABILITIES, DEFAULT_MODEL_CAPABILITIES } from './interfaces/model_capabilities.js'; | import { DEFAULT_MODEL_CAPABILITIES } from './interfaces/model_capabilities.js'; | ||||||
|  | import { MODEL_CAPABILITIES } from './constants/search_constants.js'; | ||||||
| import aiServiceManager from './ai_service_manager.js'; | import aiServiceManager from './ai_service_manager.js'; | ||||||
| import { getEmbeddingProvider } from './providers/providers.js'; |  | ||||||
| import type { BaseEmbeddingProvider } from './embeddings/base_embeddings.js'; |  | ||||||
| import type { EmbeddingModelInfo } from './interfaces/embedding_interfaces.js'; |  | ||||||
|  |  | ||||||
| // Define a type for embedding providers that might have the getModelInfo method |  | ||||||
| interface EmbeddingProviderWithModelInfo { |  | ||||||
|     getModelInfo?: (modelName: string) => Promise<EmbeddingModelInfo>; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * Service for fetching and caching model capabilities |  * Service for fetching and caching model capabilities | ||||||
|  |  * Simplified to only handle chat models since embeddings have been removed | ||||||
|  */ |  */ | ||||||
| export class ModelCapabilitiesService { | export class ModelCapabilitiesService { | ||||||
|     // Cache model capabilities |     // Cache model capabilities | ||||||
|     private capabilitiesCache: Map<string, ModelCapabilities> = new Map(); |     private capabilitiesCache: Map<string, ModelCapabilities> = new Map(); | ||||||
|  |  | ||||||
|     constructor() { |  | ||||||
|         // Initialize cache with known models |  | ||||||
|         this.initializeCache(); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
|      * Initialize the cache with known model capabilities |      * Get capabilities for a chat model | ||||||
|      */ |      */ | ||||||
|     private initializeCache() { |     async getChatModelCapabilities(modelName: string): Promise<ModelCapabilities> { | ||||||
|         // Add all predefined model capabilities to cache |  | ||||||
|         for (const [model, capabilities] of Object.entries(MODEL_CAPABILITIES)) { |  | ||||||
|             this.capabilitiesCache.set(model, { |  | ||||||
|                 ...DEFAULT_MODEL_CAPABILITIES, |  | ||||||
|                 ...capabilities |  | ||||||
|             }); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Get model capabilities, fetching from provider if needed |  | ||||||
|      * |  | ||||||
|      * @param modelName Full model name (with or without provider prefix) |  | ||||||
|      * @returns Model capabilities |  | ||||||
|      */ |  | ||||||
|     async getModelCapabilities(modelName: string): Promise<ModelCapabilities> { |  | ||||||
|         // Handle provider-prefixed model names (e.g., "openai:gpt-4") |  | ||||||
|         let provider = 'default'; |  | ||||||
|         let baseModelName = modelName; |  | ||||||
|  |  | ||||||
|         if (modelName.includes(':')) { |  | ||||||
|             const parts = modelName.split(':'); |  | ||||||
|             provider = parts[0]; |  | ||||||
|             baseModelName = parts[1]; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Check cache first |         // Check cache first | ||||||
|         const cacheKey = baseModelName; |         const cached = this.capabilitiesCache.get(`chat:${modelName}`); | ||||||
|         if (this.capabilitiesCache.has(cacheKey)) { |         if (cached) { | ||||||
|             return this.capabilitiesCache.get(cacheKey)!; |             return cached; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // Fetch from provider if possible |         // Get from static definitions or service | ||||||
|         try { |         const capabilities = await this.fetchChatModelCapabilities(modelName); | ||||||
|             // Get provider service |          | ||||||
|             const providerService = aiServiceManager.getService(provider); |         // Cache the result | ||||||
|  |         this.capabilitiesCache.set(`chat:${modelName}`, capabilities); | ||||||
|             if (providerService && typeof (providerService as any).getModelCapabilities === 'function') { |          | ||||||
|                 // If provider supports direct capability fetching, use it |         return capabilities; | ||||||
|                 const capabilities = await (providerService as any).getModelCapabilities(baseModelName); |  | ||||||
|  |  | ||||||
|                 if (capabilities) { |  | ||||||
|                     // Merge with defaults and cache |  | ||||||
|                     const fullCapabilities = { |  | ||||||
|                         ...DEFAULT_MODEL_CAPABILITIES, |  | ||||||
|                         ...capabilities |  | ||||||
|                     }; |  | ||||||
|  |  | ||||||
|                     this.capabilitiesCache.set(cacheKey, fullCapabilities); |  | ||||||
|                     log.info(`Fetched capabilities for ${modelName}: context window ${fullCapabilities.contextWindowTokens} tokens`); |  | ||||||
|  |  | ||||||
|                     return fullCapabilities; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Try to fetch from embedding provider if available |  | ||||||
|             const embeddingProvider = getEmbeddingProvider(provider); |  | ||||||
|  |  | ||||||
|             if (embeddingProvider) { |  | ||||||
|                 try { |  | ||||||
|                     // Cast to a type that might have getModelInfo method |  | ||||||
|                     const providerWithModelInfo = embeddingProvider as unknown as EmbeddingProviderWithModelInfo; |  | ||||||
|  |  | ||||||
|                     if (providerWithModelInfo.getModelInfo) { |  | ||||||
|                         const modelInfo = await providerWithModelInfo.getModelInfo(baseModelName); |  | ||||||
|  |  | ||||||
|                         if (modelInfo && modelInfo.contextWidth) { |  | ||||||
|                             // Convert to our capabilities format |  | ||||||
|                             const capabilities: ModelCapabilities = { |  | ||||||
|                                 ...DEFAULT_MODEL_CAPABILITIES, |  | ||||||
|                                 contextWindowTokens: modelInfo.contextWidth, |  | ||||||
|                                 contextWindowChars: modelInfo.contextWidth * 4 // Rough estimate: 4 chars per token |  | ||||||
|                             }; |  | ||||||
|  |  | ||||||
|                             this.capabilitiesCache.set(cacheKey, capabilities); |  | ||||||
|                             log.info(`Derived capabilities for ${modelName} from embedding provider: context window ${capabilities.contextWindowTokens} tokens`); |  | ||||||
|  |  | ||||||
|                             return capabilities; |  | ||||||
|                         } |  | ||||||
|                     } |  | ||||||
|                 } catch (error) { |  | ||||||
|                     log.info(`Could not get model info from embedding provider for ${modelName}: ${error}`); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`Error fetching model capabilities for ${modelName}: ${error}`); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // If we get here, try to find a similar model in our predefined list |  | ||||||
|         for (const knownModel of Object.keys(MODEL_CAPABILITIES)) { |  | ||||||
|             // Check if the model name contains this known model (e.g., "gpt-4-1106-preview" contains "gpt-4") |  | ||||||
|             if (baseModelName.includes(knownModel)) { |  | ||||||
|                 const capabilities = { |  | ||||||
|                     ...DEFAULT_MODEL_CAPABILITIES, |  | ||||||
|                     ...MODEL_CAPABILITIES[knownModel] |  | ||||||
|                 }; |  | ||||||
|  |  | ||||||
|                 this.capabilitiesCache.set(cacheKey, capabilities); |  | ||||||
|                 log.info(`Using similar model (${knownModel}) capabilities for ${modelName}`); |  | ||||||
|  |  | ||||||
|                 return capabilities; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Fall back to defaults if nothing else works |  | ||||||
|         log.info(`Using default capabilities for unknown model ${modelName}`); |  | ||||||
|         this.capabilitiesCache.set(cacheKey, DEFAULT_MODEL_CAPABILITIES); |  | ||||||
|  |  | ||||||
|         return DEFAULT_MODEL_CAPABILITIES; |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
|      * Update model capabilities in the cache |      * Fetch chat model capabilities from AI service or static definitions | ||||||
|      * |  | ||||||
|      * @param modelName Model name |  | ||||||
|      * @param capabilities Capabilities to update |  | ||||||
|      */ |      */ | ||||||
|     updateModelCapabilities(modelName: string, capabilities: Partial<ModelCapabilities>) { |     private async fetchChatModelCapabilities(modelName: string): Promise<ModelCapabilities> { | ||||||
|         const currentCapabilities = this.capabilitiesCache.get(modelName) || DEFAULT_MODEL_CAPABILITIES; |         try { | ||||||
|  |             // Try to get from static definitions first | ||||||
|  |             const staticCapabilities = MODEL_CAPABILITIES[modelName.toLowerCase()]; | ||||||
|  |             if (staticCapabilities) { | ||||||
|  |                 log.info(`Using static capabilities for chat model: ${modelName}`); | ||||||
|  |                 // Merge partial capabilities with defaults | ||||||
|  |                 return { | ||||||
|  |                     ...DEFAULT_MODEL_CAPABILITIES, | ||||||
|  |                     ...staticCapabilities | ||||||
|  |                 }; | ||||||
|  |             } | ||||||
|  |  | ||||||
|         this.capabilitiesCache.set(modelName, { |             // AI service doesn't have getModelCapabilities method | ||||||
|             ...currentCapabilities, |             // Use default capabilities instead | ||||||
|             ...capabilities |             log.info(`AI service doesn't support model capabilities - using defaults for model: ${modelName}`); | ||||||
|         }); |  | ||||||
|  |             // Fallback to default capabilities | ||||||
|  |             log.info(`Using default capabilities for chat model: ${modelName}`); | ||||||
|  |             return DEFAULT_MODEL_CAPABILITIES; | ||||||
|  |         } catch (error) { | ||||||
|  |             log.error(`Error fetching capabilities for chat model ${modelName}: ${error}`); | ||||||
|  |             return DEFAULT_MODEL_CAPABILITIES; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /** | ||||||
|  |      * Clear capabilities cache | ||||||
|  |      */ | ||||||
|  |     clearCache(): void { | ||||||
|  |         this.capabilitiesCache.clear(); | ||||||
|  |         log.info('Model capabilities cache cleared'); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /** | ||||||
|  |      * Get all cached capabilities | ||||||
|  |      */ | ||||||
|  |     getCachedCapabilities(): Record<string, ModelCapabilities> { | ||||||
|  |         const result: Record<string, ModelCapabilities> = {}; | ||||||
|  |         for (const [key, value] of this.capabilitiesCache.entries()) { | ||||||
|  |             result[key] = value; | ||||||
|  |         } | ||||||
|  |         return result; | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| // Create and export singleton instance | // Export singleton instance | ||||||
| const modelCapabilitiesService = new ModelCapabilitiesService(); | export const modelCapabilitiesService = new ModelCapabilitiesService(); | ||||||
| export default modelCapabilitiesService; | export default modelCapabilitiesService; | ||||||
| @@ -8,7 +8,7 @@ import { ModelSelectionStage } from './stages/model_selection_stage.js'; | |||||||
| import { LLMCompletionStage } from './stages/llm_completion_stage.js'; | import { LLMCompletionStage } from './stages/llm_completion_stage.js'; | ||||||
| import { ResponseProcessingStage } from './stages/response_processing_stage.js'; | import { ResponseProcessingStage } from './stages/response_processing_stage.js'; | ||||||
| import { ToolCallingStage } from './stages/tool_calling_stage.js'; | import { ToolCallingStage } from './stages/tool_calling_stage.js'; | ||||||
| import { VectorSearchStage } from './stages/vector_search_stage.js'; | // VectorSearchStage removed along with embedding functionality | ||||||
| import toolRegistry from '../tools/tool_registry.js'; | import toolRegistry from '../tools/tool_registry.js'; | ||||||
| import toolInitializer from '../tools/tool_initializer.js'; | import toolInitializer from '../tools/tool_initializer.js'; | ||||||
| import log from '../../log.js'; | import log from '../../log.js'; | ||||||
| @@ -29,7 +29,7 @@ export class ChatPipeline { | |||||||
|         llmCompletion: LLMCompletionStage; |         llmCompletion: LLMCompletionStage; | ||||||
|         responseProcessing: ResponseProcessingStage; |         responseProcessing: ResponseProcessingStage; | ||||||
|         toolCalling: ToolCallingStage; |         toolCalling: ToolCallingStage; | ||||||
|         vectorSearch: VectorSearchStage; |         // vectorSearch removed with embedding functionality | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     config: ChatPipelineConfig; |     config: ChatPipelineConfig; | ||||||
| @@ -50,7 +50,7 @@ export class ChatPipeline { | |||||||
|             llmCompletion: new LLMCompletionStage(), |             llmCompletion: new LLMCompletionStage(), | ||||||
|             responseProcessing: new ResponseProcessingStage(), |             responseProcessing: new ResponseProcessingStage(), | ||||||
|             toolCalling: new ToolCallingStage(), |             toolCalling: new ToolCallingStage(), | ||||||
|             vectorSearch: new VectorSearchStage() |             // vectorSearch removed with embedding functionality | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         // Set default configuration values |         // Set default configuration values | ||||||
| @@ -198,27 +198,20 @@ export class ChatPipeline { | |||||||
|                 log.info('No LLM service available for query decomposition, using original query'); |                 log.info('No LLM service available for query decomposition, using original query'); | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             // STAGE 3: Execute vector similarity search with decomposed queries |             // STAGE 3: Vector search has been removed - skip semantic search | ||||||
|             const vectorSearchStartTime = Date.now(); |             const vectorSearchStartTime = Date.now(); | ||||||
|             log.info(`========== STAGE 3: VECTOR SEARCH ==========`); |             log.info(`========== STAGE 3: VECTOR SEARCH (DISABLED) ==========`); | ||||||
|             log.info('Using VectorSearchStage pipeline component to find relevant notes'); |             log.info('Vector search has been removed - LLM will rely on tool calls for context'); | ||||||
|             log.info(`Searching with ${searchQueries.length} queries from decomposition`); |  | ||||||
|  |  | ||||||
|             // Use the vectorSearchStage with multiple queries |             // Create empty vector search result since vector search is disabled | ||||||
|             const vectorSearchResult = await this.stages.vectorSearch.execute({ |             const vectorSearchResult = { | ||||||
|                 query: userQuery, // Original query as fallback |                 searchResults: [], | ||||||
|                 queries: searchQueries, // All decomposed queries |                 totalResults: 0, | ||||||
|                 noteId: input.noteId || 'global', |                 executionTime: Date.now() - vectorSearchStartTime | ||||||
|                 options: { |             }; | ||||||
|                     maxResults: SEARCH_CONSTANTS.CONTEXT.MAX_SIMILAR_NOTES, |  | ||||||
|                     useEnhancedQueries: false, // We're already using enhanced queries from decomposition |  | ||||||
|                     threshold: SEARCH_CONSTANTS.VECTOR_SEARCH.DEFAULT_THRESHOLD, |  | ||||||
|                     llmService: llmService || undefined |  | ||||||
|                 } |  | ||||||
|             }); |  | ||||||
|  |  | ||||||
|             this.updateStageMetrics('vectorSearch', vectorSearchStartTime); |             this.updateStageMetrics('vectorSearch', vectorSearchStartTime); | ||||||
|             log.info(`Vector search found ${vectorSearchResult.searchResults.length} relevant notes across ${searchQueries.length} queries`); |             log.info(`Vector search disabled - using tool-based context extraction instead`); | ||||||
|  |  | ||||||
|             // Extract context from search results |             // Extract context from search results | ||||||
|             log.info(`========== SEMANTIC CONTEXT EXTRACTION ==========`); |             log.info(`========== SEMANTIC CONTEXT EXTRACTION ==========`); | ||||||
|   | |||||||
| @@ -1,70 +1,27 @@ | |||||||
| import { BasePipelineStage } from '../pipeline_stage.js'; | import { BasePipelineStage } from '../pipeline_stage.js'; | ||||||
| import type { SemanticContextExtractionInput } from '../interfaces.js'; | import type { SemanticContextExtractionInput } from '../interfaces.js'; | ||||||
| import aiServiceManager from '../../ai_service_manager.js'; |  | ||||||
| import log from '../../../log.js'; | import log from '../../../log.js'; | ||||||
| import { VectorSearchStage } from './vector_search_stage.js'; |  | ||||||
| import contextFormatter from '../../context/modules/context_formatter.js'; |  | ||||||
| import providerManager from '../../context/modules/provider_manager.js'; |  | ||||||
| import type { NoteSearchResult } from '../../interfaces/context_interfaces.js'; |  | ||||||
| import type { Message } from '../../ai_interface.js'; |  | ||||||
| import { SEARCH_CONSTANTS } from "../../constants/search_constants.js"; |  | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * Pipeline stage for extracting semantic context from notes |  * Pipeline stage for extracting semantic context from notes | ||||||
|  * This uses the new VectorSearchStage to find relevant content |  * Since vector search has been removed, this now returns empty context | ||||||
|  |  * and relies on other context extraction methods | ||||||
|  */ |  */ | ||||||
| export class SemanticContextExtractionStage extends BasePipelineStage<SemanticContextExtractionInput, { context: string }> { | export class SemanticContextExtractionStage extends BasePipelineStage<SemanticContextExtractionInput, { context: string }> { | ||||||
|     private vectorSearchStage: VectorSearchStage; |  | ||||||
|  |  | ||||||
|     constructor() { |     constructor() { | ||||||
|         super('SemanticContextExtraction'); |         super('SemanticContextExtraction'); | ||||||
|         this.vectorSearchStage = new VectorSearchStage(); |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
|      * Extract semantic context based on a query |      * Extract semantic context based on a query | ||||||
|  |      * Returns empty context since vector search has been removed | ||||||
|      */ |      */ | ||||||
|     protected async process(input: SemanticContextExtractionInput): Promise<{ context: string }> { |     protected async process(input: SemanticContextExtractionInput): Promise<{ context: string }> { | ||||||
|         const { noteId, query, maxResults = 5, messages = [] } = input; |         const { noteId, query } = input; | ||||||
|         log.info(`Extracting semantic context from note ${noteId}, query: ${query?.substring(0, 50)}...`); |         log.info(`Semantic context extraction disabled - vector search has been removed. Using tool-based context instead for note ${noteId}`); | ||||||
|  |  | ||||||
|         try { |         // Return empty context since we no longer use vector search | ||||||
|             // Step 1: Use vector search stage to find relevant notes |         // The LLM will rely on tool calls for context gathering | ||||||
|             const vectorSearchResult = await this.vectorSearchStage.execute({ |         return { context: "" }; | ||||||
|                 query, |  | ||||||
|                 noteId, |  | ||||||
|                 options: { |  | ||||||
|                     maxResults, |  | ||||||
|                     useEnhancedQueries: true, |  | ||||||
|                     threshold: SEARCH_CONSTANTS.VECTOR_SEARCH.DEFAULT_THRESHOLD, |  | ||||||
|                     llmService: undefined // Let the vectorSearchStage use the default service |  | ||||||
|                 } |  | ||||||
|             }); |  | ||||||
|  |  | ||||||
|             log.info(`Vector search found ${vectorSearchResult.searchResults.length} relevant notes`); |  | ||||||
|  |  | ||||||
|             // If no results, return empty context |  | ||||||
|             if (vectorSearchResult.searchResults.length === 0) { |  | ||||||
|                 log.info(`No relevant notes found for context extraction`); |  | ||||||
|                 return { context: "" }; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Step 2: Format search results into a context string |  | ||||||
|             const provider = await providerManager.getSelectedEmbeddingProvider(); |  | ||||||
|             const providerId = provider?.name || 'default'; |  | ||||||
|  |  | ||||||
|             const context = await contextFormatter.buildContextFromNotes( |  | ||||||
|                 vectorSearchResult.searchResults, |  | ||||||
|                 query, |  | ||||||
|                 providerId, |  | ||||||
|                 messages |  | ||||||
|             ); |  | ||||||
|  |  | ||||||
|             log.info(`Built context of ${context.length} chars from ${vectorSearchResult.searchResults.length} notes`); |  | ||||||
|             return { context }; |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`Error extracting semantic context: ${error}`); |  | ||||||
|             return { context: "" }; |  | ||||||
|         } |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -37,11 +37,7 @@ interface ToolValidationResult { | |||||||
| export class ToolCallingStage extends BasePipelineStage<ToolExecutionInput, { response: ChatResponse, needsFollowUp: boolean, messages: Message[] }> { | export class ToolCallingStage extends BasePipelineStage<ToolExecutionInput, { response: ChatResponse, needsFollowUp: boolean, messages: Message[] }> { | ||||||
|     constructor() { |     constructor() { | ||||||
|         super('ToolCalling'); |         super('ToolCalling'); | ||||||
|  |         // Vector search tool has been removed - no preloading needed | ||||||
|         // Preload the vectorSearchTool to ensure it's available when needed |  | ||||||
|         this.preloadVectorSearchTool().catch(error => { |  | ||||||
|             log.error(`Error preloading vector search tool: ${error.message}`); |  | ||||||
|         }); |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
| @@ -498,13 +494,13 @@ export class ToolCallingStage extends BasePipelineStage<ToolExecutionInput, { re | |||||||
|  |  | ||||||
|             let directiveMessage = `YOU MUST NOT GIVE UP AFTER A SINGLE EMPTY SEARCH RESULT. `; |             let directiveMessage = `YOU MUST NOT GIVE UP AFTER A SINGLE EMPTY SEARCH RESULT. `; | ||||||
|  |  | ||||||
|             if (emptyToolNames.includes('search_notes') || emptyToolNames.includes('vector_search')) { |             if (emptyToolNames.includes('search_notes') || emptyToolNames.includes('keyword_search')) { | ||||||
|                 directiveMessage += `IMMEDIATELY RUN ANOTHER SEARCH TOOL with broader search terms, alternative keywords, or related concepts. `; |                 directiveMessage += `IMMEDIATELY RUN ANOTHER SEARCH TOOL with broader search terms, alternative keywords, or related concepts. `; | ||||||
|                 directiveMessage += `Try synonyms, more general terms, or related topics. `; |                 directiveMessage += `Try synonyms, more general terms, or related topics. `; | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             if (emptyToolNames.includes('keyword_search')) { |             if (emptyToolNames.includes('keyword_search')) { | ||||||
|                 directiveMessage += `IMMEDIATELY TRY VECTOR_SEARCH INSTEAD as it might find semantic matches where keyword search failed. `; |                 directiveMessage += `IMMEDIATELY TRY SEARCH_NOTES INSTEAD as it might find matches where keyword search failed. `; | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             directiveMessage += `DO NOT ask the user what to do next or if they want general information. CONTINUE SEARCHING with different parameters.`; |             directiveMessage += `DO NOT ask the user what to do next or if they want general information. CONTINUE SEARCHING with different parameters.`; | ||||||
| @@ -530,71 +526,6 @@ export class ToolCallingStage extends BasePipelineStage<ToolExecutionInput, { re | |||||||
|         }; |         }; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Get or create a dependency required by tools |  | ||||||
|      * |  | ||||||
|      * @param dependencyType The type of dependency to get or create |  | ||||||
|      * @param toolName The name of the tool requiring this dependency |  | ||||||
|      * @returns The requested dependency or null if it couldn't be created |  | ||||||
|      */ |  | ||||||
|     private async getOrCreateDependency(dependencyType: string, toolName: string): Promise<unknown | null> { |  | ||||||
|         const aiServiceManager = (await import('../../ai_service_manager.js')).default; |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             log.info(`Getting dependency '${dependencyType}' for tool '${toolName}'`); |  | ||||||
|  |  | ||||||
|             // Check for specific dependency types |  | ||||||
|             if (dependencyType === 'vectorSearchTool') { |  | ||||||
|                 // Try to get the existing vector search tool |  | ||||||
|                 let vectorSearchTool = aiServiceManager.getVectorSearchTool(); |  | ||||||
|  |  | ||||||
|                 if (vectorSearchTool) { |  | ||||||
|                     log.info(`Found existing vectorSearchTool dependency`); |  | ||||||
|                     return vectorSearchTool; |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 // No existing tool, try to initialize it |  | ||||||
|                 log.info(`Dependency '${dependencyType}' not found, attempting initialization`); |  | ||||||
|  |  | ||||||
|                 // Get agent tools manager and initialize it |  | ||||||
|                 const agentTools = aiServiceManager.getAgentTools(); |  | ||||||
|                 if (agentTools && typeof agentTools.initialize === 'function') { |  | ||||||
|                     try { |  | ||||||
|                         // Force initialization to ensure it runs even if previously marked as initialized |  | ||||||
|                         await agentTools.initialize(true); |  | ||||||
|                     } catch (initError: unknown) { |  | ||||||
|                         const errorMessage = initError instanceof Error ? initError.message : String(initError); |  | ||||||
|                         log.error(`Failed to initialize agent tools: ${errorMessage}`); |  | ||||||
|                         return null; |  | ||||||
|                     } |  | ||||||
|                 } else { |  | ||||||
|                     log.error('Agent tools manager not available'); |  | ||||||
|                     return null; |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 // Try getting the vector search tool again after initialization |  | ||||||
|                 vectorSearchTool = aiServiceManager.getVectorSearchTool(); |  | ||||||
|  |  | ||||||
|                 if (vectorSearchTool) { |  | ||||||
|                     log.info('Successfully created vectorSearchTool dependency'); |  | ||||||
|                     return vectorSearchTool; |  | ||||||
|                 } else { |  | ||||||
|                     log.error('Failed to create vectorSearchTool dependency after initialization'); |  | ||||||
|                     return null; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Add more dependency types as needed |  | ||||||
|  |  | ||||||
|             // Unknown dependency type |  | ||||||
|             log.error(`Unknown dependency type: ${dependencyType}`); |  | ||||||
|             return null; |  | ||||||
|         } catch (error: unknown) { |  | ||||||
|             const errorMessage = error instanceof Error ? error.message : String(error); |  | ||||||
|             log.error(`Error getting or creating dependency '${dependencyType}': ${errorMessage}`); |  | ||||||
|             return null; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
|      * Validate a tool before execution |      * Validate a tool before execution | ||||||
| @@ -614,50 +545,9 @@ export class ToolCallingStage extends BasePipelineStage<ToolExecutionInput, { re | |||||||
|                 return false; |                 return false; | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             // For the search_notes tool specifically, check if vectorSearchTool is available |             // search_notes tool now uses context handler instead of vector search | ||||||
|             if (toolName === 'search_notes') { |             if (toolName === 'search_notes') { | ||||||
|                 try { |                 log.info(`Tool '${toolName}' validated - uses context handler instead of vector search`); | ||||||
|                     // Use the imported aiServiceManager instead of dynamic import |  | ||||||
|                     let vectorSearchTool = aiServiceManager.getVectorSearchTool(); |  | ||||||
|  |  | ||||||
|                     if (!vectorSearchTool) { |  | ||||||
|                         log.error(`Tool '${toolName}' is missing dependency: vectorSearchTool - attempting to initialize`); |  | ||||||
|  |  | ||||||
|                         // Try to initialize the agent tools |  | ||||||
|                         try { |  | ||||||
|                             // Get agent tools manager and initialize it if needed |  | ||||||
|                             const agentTools = aiServiceManager.getAgentTools(); |  | ||||||
|                             if (agentTools && typeof agentTools.initialize === 'function') { |  | ||||||
|                                 log.info('Attempting to initialize agent tools'); |  | ||||||
|                                 // Force initialization to ensure it runs even if previously initialized |  | ||||||
|                                 await agentTools.initialize(true); |  | ||||||
|                             } |  | ||||||
|  |  | ||||||
|                             // Try getting the vector search tool again |  | ||||||
|                             vectorSearchTool = aiServiceManager.getVectorSearchTool(); |  | ||||||
|  |  | ||||||
|                             if (!vectorSearchTool) { |  | ||||||
|                                 log.error('Unable to initialize vectorSearchTool after initialization attempt'); |  | ||||||
|                                 return false; |  | ||||||
|                             } |  | ||||||
|                             log.info('Successfully initialized vectorSearchTool'); |  | ||||||
|                         } catch (initError: unknown) { |  | ||||||
|                             const errorMessage = initError instanceof Error ? initError.message : String(initError); |  | ||||||
|                             log.error(`Failed to initialize agent tools: ${errorMessage}`); |  | ||||||
|                             return false; |  | ||||||
|                         } |  | ||||||
|                     } |  | ||||||
|  |  | ||||||
|                     // Verify the vectorSearchTool has the required methods |  | ||||||
|                     if (!vectorSearchTool.searchNotes || typeof vectorSearchTool.searchNotes !== 'function') { |  | ||||||
|                         log.error(`Tool '${toolName}' dependency vectorSearchTool is missing searchNotes method`); |  | ||||||
|                         return false; |  | ||||||
|                     } |  | ||||||
|                 } catch (error: unknown) { |  | ||||||
|                     const errorMessage = error instanceof Error ? error.message : String(error); |  | ||||||
|                     log.error(`Error validating dependencies for tool '${toolName}': ${errorMessage}`); |  | ||||||
|                     return false; |  | ||||||
|                 } |  | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             // Add additional tool-specific validations here |             // Add additional tool-specific validations here | ||||||
| @@ -705,13 +595,13 @@ export class ToolCallingStage extends BasePipelineStage<ToolExecutionInput, { re | |||||||
|             // Provide guidance on available search tools if a tool wasn't found |             // Provide guidance on available search tools if a tool wasn't found | ||||||
|             const searchTools = availableToolNames.filter(name => name.includes('search')); |             const searchTools = availableToolNames.filter(name => name.includes('search')); | ||||||
|             guidance += `AVAILABLE SEARCH TOOLS: ${searchTools.join(', ')}\n`; |             guidance += `AVAILABLE SEARCH TOOLS: ${searchTools.join(', ')}\n`; | ||||||
|             guidance += "TRY VECTOR SEARCH: For conceptual matches, use 'vector_search' with a query parameter.\n"; |             guidance += "TRY SEARCH NOTES: For semantic matches, use 'search_notes' with a query parameter.\n"; | ||||||
|             guidance += "EXAMPLE: { \"query\": \"your search terms here\" }\n"; |             guidance += "EXAMPLE: { \"query\": \"your search terms here\" }\n"; | ||||||
|         } |         } | ||||||
|         else if (errorMessage.includes('missing required parameter')) { |         else if (errorMessage.includes('missing required parameter')) { | ||||||
|             // Provide parameter guidance based on the tool name |             // Provide parameter guidance based on the tool name | ||||||
|             if (toolName === 'vector_search') { |             if (toolName === 'search_notes') { | ||||||
|                 guidance += "REQUIRED PARAMETERS: The 'vector_search' tool requires a 'query' parameter.\n"; |                 guidance += "REQUIRED PARAMETERS: The 'search_notes' tool requires a 'query' parameter.\n"; | ||||||
|                 guidance += "EXAMPLE: { \"query\": \"your search terms here\" }\n"; |                 guidance += "EXAMPLE: { \"query\": \"your search terms here\" }\n"; | ||||||
|             } else if (toolName === 'keyword_search') { |             } else if (toolName === 'keyword_search') { | ||||||
|                 guidance += "REQUIRED PARAMETERS: The 'keyword_search' tool requires a 'query' parameter.\n"; |                 guidance += "REQUIRED PARAMETERS: The 'keyword_search' tool requires a 'query' parameter.\n"; | ||||||
| @@ -719,9 +609,9 @@ export class ToolCallingStage extends BasePipelineStage<ToolExecutionInput, { re | |||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // Add a general suggestion to try vector_search as a fallback |         // Add a general suggestion to try search_notes as a fallback | ||||||
|         if (!toolName.includes('vector_search')) { |         if (!toolName.includes('search_notes')) { | ||||||
|             guidance += "RECOMMENDATION: If specific searches fail, try the 'vector_search' tool which performs semantic searches.\n"; |             guidance += "RECOMMENDATION: If specific searches fail, try the 'search_notes' tool which performs semantic searches.\n"; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         return guidance; |         return guidance; | ||||||
| @@ -751,11 +641,6 @@ export class ToolCallingStage extends BasePipelineStage<ToolExecutionInput, { re | |||||||
|                 return false; |                 return false; | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             if (toolName === 'vector_search' && |  | ||||||
|                 (trimmed.includes('No results found') || |  | ||||||
|                  trimmed.includes('No matching documents'))) { |  | ||||||
|                 return true; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             if (toolName === 'keyword_search' && |             if (toolName === 'keyword_search' && | ||||||
|                 (trimmed.includes('No matches found') || |                 (trimmed.includes('No matches found') || | ||||||
| @@ -787,39 +672,10 @@ export class ToolCallingStage extends BasePipelineStage<ToolExecutionInput, { re | |||||||
|                     return true; |                     return true; | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                 if (toolName === 'vector_search' && |  | ||||||
|                     'matches' in resultObj && |  | ||||||
|                     Array.isArray(resultObj.matches) && |  | ||||||
|                     resultObj.matches.length === 0) { |  | ||||||
|                     return true; |  | ||||||
|                 } |  | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Preload the vector search tool to ensure it's available before tool execution |  | ||||||
|      */ |  | ||||||
|     private async preloadVectorSearchTool(): Promise<void> { |  | ||||||
|         try { |  | ||||||
|             log.info(`Preloading vector search tool...`); |  | ||||||
|  |  | ||||||
|             // Get the agent tools and initialize them if needed |  | ||||||
|             const agentTools = aiServiceManager.getAgentTools(); |  | ||||||
|             if (agentTools && typeof agentTools.initialize === 'function') { |  | ||||||
|                 await agentTools.initialize(true); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Check if the vector search tool is available |  | ||||||
|             const vectorSearchTool = aiServiceManager.getVectorSearchTool(); |  | ||||||
|             if (!(vectorSearchTool && typeof vectorSearchTool.searchNotes === 'function')) { |  | ||||||
|                 log.error(`Vector search tool not available after initialization`); |  | ||||||
|             } |  | ||||||
|         } catch (error: unknown) { |  | ||||||
|             const errorMessage = error instanceof Error ? error.message : String(error); |  | ||||||
|             log.error(`Failed to preload vector search tool: ${errorMessage}`); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,126 +0,0 @@ | |||||||
| /** |  | ||||||
|  * Vector Search Stage |  | ||||||
|  * |  | ||||||
|  * Part of the chat pipeline that handles finding semantically relevant notes |  | ||||||
|  * using vector similarity search. |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| import log from '../../../log.js'; |  | ||||||
| import vectorSearchService from '../../context/services/vector_search_service.js'; |  | ||||||
| import type { NoteSearchResult } from '../../interfaces/context_interfaces.js'; |  | ||||||
| import type { LLMServiceInterface } from '../../interfaces/agent_tool_interfaces.js'; |  | ||||||
| import { SEARCH_CONSTANTS } from '../../constants/search_constants.js'; |  | ||||||
|  |  | ||||||
| export interface VectorSearchInput { |  | ||||||
|   query: string; |  | ||||||
|   queries?: string[]; |  | ||||||
|   noteId?: string; |  | ||||||
|   options?: { |  | ||||||
|     maxResults?: number; |  | ||||||
|     threshold?: number; |  | ||||||
|     useEnhancedQueries?: boolean; |  | ||||||
|     llmService?: LLMServiceInterface; |  | ||||||
|   }; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| export interface VectorSearchOutput { |  | ||||||
|   searchResults: NoteSearchResult[]; |  | ||||||
|   originalQuery: string; |  | ||||||
|   noteId: string; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Pipeline stage for performing vector-based semantic search |  | ||||||
|  */ |  | ||||||
| export class VectorSearchStage { |  | ||||||
|   constructor() { |  | ||||||
|     log.info('VectorSearchStage initialized'); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   /** |  | ||||||
|    * Execute vector search to find relevant notes |  | ||||||
|    */ |  | ||||||
|   async execute(input: VectorSearchInput): Promise<VectorSearchOutput> { |  | ||||||
|     const { |  | ||||||
|       query, |  | ||||||
|       queries = [], |  | ||||||
|       noteId = 'global', |  | ||||||
|       options = {} |  | ||||||
|     } = input; |  | ||||||
|  |  | ||||||
|     const { |  | ||||||
|       maxResults = SEARCH_CONSTANTS.VECTOR_SEARCH.DEFAULT_MAX_RESULTS, |  | ||||||
|       threshold = SEARCH_CONSTANTS.VECTOR_SEARCH.DEFAULT_THRESHOLD, |  | ||||||
|       useEnhancedQueries = false, |  | ||||||
|       llmService = undefined |  | ||||||
|     } = options; |  | ||||||
|  |  | ||||||
|     // If queries array is provided, use multi-query search |  | ||||||
|     if (queries && queries.length > 0) { |  | ||||||
|       log.info(`VectorSearchStage: Searching with ${queries.length} queries`); |  | ||||||
|       log.info(`Parameters: noteId=${noteId}, maxResults=${maxResults}, threshold=${threshold}`); |  | ||||||
|  |  | ||||||
|       try { |  | ||||||
|         // Use the new multi-query method |  | ||||||
|         const searchResults = await vectorSearchService.findRelevantNotesMultiQuery( |  | ||||||
|           queries, |  | ||||||
|           noteId === 'global' ? null : noteId, |  | ||||||
|           { |  | ||||||
|             maxResults, |  | ||||||
|             threshold, |  | ||||||
|             llmService: llmService || null |  | ||||||
|           } |  | ||||||
|         ); |  | ||||||
|  |  | ||||||
|         log.info(`VectorSearchStage: Found ${searchResults.length} relevant notes from multi-query search`); |  | ||||||
|  |  | ||||||
|         return { |  | ||||||
|           searchResults, |  | ||||||
|           originalQuery: query, |  | ||||||
|           noteId |  | ||||||
|         }; |  | ||||||
|       } catch (error) { |  | ||||||
|         log.error(`Error in vector search stage multi-query: ${error}`); |  | ||||||
|         // Return empty results on error |  | ||||||
|         return { |  | ||||||
|           searchResults: [], |  | ||||||
|           originalQuery: query, |  | ||||||
|           noteId |  | ||||||
|         }; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Fallback to single query search |  | ||||||
|     log.info(`VectorSearchStage: Searching for "${query.substring(0, 50)}..."`); |  | ||||||
|     log.info(`Parameters: noteId=${noteId}, maxResults=${maxResults}, threshold=${threshold}`); |  | ||||||
|  |  | ||||||
|     try { |  | ||||||
|       // Find relevant notes using vector search service |  | ||||||
|       const searchResults = await vectorSearchService.findRelevantNotes( |  | ||||||
|         query, |  | ||||||
|         noteId === 'global' ? null : noteId, |  | ||||||
|         { |  | ||||||
|           maxResults, |  | ||||||
|           threshold, |  | ||||||
|           llmService: llmService || null |  | ||||||
|         } |  | ||||||
|       ); |  | ||||||
|  |  | ||||||
|       log.info(`VectorSearchStage: Found ${searchResults.length} relevant notes`); |  | ||||||
|  |  | ||||||
|       return { |  | ||||||
|         searchResults, |  | ||||||
|         originalQuery: query, |  | ||||||
|         noteId |  | ||||||
|       }; |  | ||||||
|     } catch (error) { |  | ||||||
|       log.error(`Error in vector search stage: ${error}`); |  | ||||||
|       // Return empty results on error |  | ||||||
|       return { |  | ||||||
|         searchResults: [], |  | ||||||
|         originalQuery: query, |  | ||||||
|         noteId |  | ||||||
|       }; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| } |  | ||||||
| @@ -1,17 +1,15 @@ | |||||||
| /** | /** | ||||||
|  * Provider Validation Service |  * Provider Validation Service | ||||||
|  *  |  *  | ||||||
|  * Validates AI provider configurations before initializing the embedding system. |  * Validates AI provider configurations before initializing the chat system. | ||||||
|  * This prevents startup errors when AI is enabled but providers are misconfigured. |  * This prevents startup errors when AI is enabled but providers are misconfigured. | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
| import log from "../log.js"; | import log from "../log.js"; | ||||||
| import options from "../options.js"; | import options from "../options.js"; | ||||||
| import type { EmbeddingProvider } from "./embeddings/embeddings_interface.js"; |  | ||||||
|  |  | ||||||
| export interface ProviderValidationResult { | export interface ProviderValidationResult { | ||||||
|     hasValidProviders: boolean; |     hasValidProviders: boolean; | ||||||
|     validEmbeddingProviders: EmbeddingProvider[]; |  | ||||||
|     validChatProviders: string[]; |     validChatProviders: string[]; | ||||||
|     errors: string[]; |     errors: string[]; | ||||||
|     warnings: string[]; |     warnings: string[]; | ||||||
| @@ -23,73 +21,35 @@ export interface ProviderValidationResult { | |||||||
| export async function validateProviders(): Promise<ProviderValidationResult> { | export async function validateProviders(): Promise<ProviderValidationResult> { | ||||||
|     const result: ProviderValidationResult = { |     const result: ProviderValidationResult = { | ||||||
|         hasValidProviders: false, |         hasValidProviders: false, | ||||||
|         validEmbeddingProviders: [], |  | ||||||
|         validChatProviders: [], |         validChatProviders: [], | ||||||
|         errors: [], |         errors: [], | ||||||
|         warnings: [] |         warnings: [] | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     try { |     log.info("Starting provider validation..."); | ||||||
|         // Check if AI is enabled |  | ||||||
|         const aiEnabled = await options.getOptionBool('aiEnabled'); |  | ||||||
|         if (!aiEnabled) { |  | ||||||
|             result.warnings.push("AI features are disabled"); |  | ||||||
|             return result; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Check configuration only - don't create providers |     // Check if AI is enabled | ||||||
|         await checkEmbeddingProviderConfigs(result); |     const aiEnabled = await options.getOptionBool('aiEnabled'); | ||||||
|         await checkChatProviderConfigs(result); |     if (!aiEnabled) { | ||||||
|  |         log.info("AI is disabled, skipping provider validation"); | ||||||
|  |         return result; | ||||||
|  |     } | ||||||
|  |  | ||||||
|         // Determine if we have any valid providers based on configuration |     // Check chat provider configurations | ||||||
|         result.hasValidProviders = result.validChatProviders.length > 0; |     await checkChatProviderConfigs(result); | ||||||
|  |  | ||||||
|         if (!result.hasValidProviders) { |     // Update overall validation status | ||||||
|             result.errors.push("No valid AI providers are configured"); |     result.hasValidProviders = result.validChatProviders.length > 0; | ||||||
|         } |  | ||||||
|  |  | ||||||
|     } catch (error: any) { |     if (result.hasValidProviders) { | ||||||
|         result.errors.push(`Error during provider validation: ${error.message || 'Unknown error'}`); |         log.info(`Provider validation successful. Valid chat providers: ${result.validChatProviders.join(', ')}`); | ||||||
|  |     } else { | ||||||
|  |         log.info("No valid providers found"); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     return result; |     return result; | ||||||
| } | } | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Check embedding provider configurations without creating providers |  | ||||||
|  */ |  | ||||||
| async function checkEmbeddingProviderConfigs(result: ProviderValidationResult): Promise<void> { |  | ||||||
|     try { |  | ||||||
|         // Check OpenAI embedding configuration |  | ||||||
|         const openaiApiKey = await options.getOption('openaiApiKey'); |  | ||||||
|         const openaiBaseUrl = await options.getOption('openaiBaseUrl'); |  | ||||||
|         if (openaiApiKey || openaiBaseUrl) { |  | ||||||
|             if (!openaiApiKey) { |  | ||||||
|                 result.warnings.push("OpenAI embedding: No API key (may work with compatible endpoints)"); |  | ||||||
|             } |  | ||||||
|             log.info("OpenAI embedding provider configuration available"); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Check Ollama embedding configuration |  | ||||||
|         const ollamaEmbeddingBaseUrl = await options.getOption('ollamaEmbeddingBaseUrl'); |  | ||||||
|         if (ollamaEmbeddingBaseUrl) { |  | ||||||
|             log.info("Ollama embedding provider configuration available"); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Check Voyage embedding configuration |  | ||||||
|         const voyageApiKey = await options.getOption('voyageApiKey' as any); |  | ||||||
|         if (voyageApiKey) { |  | ||||||
|             log.info("Voyage embedding provider configuration available"); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Local provider is always available |  | ||||||
|         log.info("Local embedding provider available as fallback"); |  | ||||||
|  |  | ||||||
|     } catch (error: any) { |  | ||||||
|         result.errors.push(`Error checking embedding provider configs: ${error.message || 'Unknown error'}`); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * Check chat provider configurations without creating providers |  * Check chat provider configurations without creating providers | ||||||
|  */ |  */ | ||||||
| @@ -100,26 +60,22 @@ async function checkChatProviderConfigs(result: ProviderValidationResult): Promi | |||||||
|         const openaiBaseUrl = await options.getOption('openaiBaseUrl'); |         const openaiBaseUrl = await options.getOption('openaiBaseUrl'); | ||||||
|          |          | ||||||
|         if (openaiApiKey || openaiBaseUrl) { |         if (openaiApiKey || openaiBaseUrl) { | ||||||
|             if (!openaiApiKey) { |  | ||||||
|                 result.warnings.push("OpenAI chat: No API key (may work with compatible endpoints)"); |  | ||||||
|             } |  | ||||||
|             result.validChatProviders.push('openai'); |             result.validChatProviders.push('openai'); | ||||||
|  |             log.info("OpenAI chat provider configuration available"); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // Check Anthropic chat provider |         // Check Anthropic chat provider | ||||||
|         const anthropicApiKey = await options.getOption('anthropicApiKey'); |         const anthropicApiKey = await options.getOption('anthropicApiKey'); | ||||||
|         if (anthropicApiKey) { |         if (anthropicApiKey) { | ||||||
|             result.validChatProviders.push('anthropic'); |             result.validChatProviders.push('anthropic'); | ||||||
|  |             log.info("Anthropic chat provider configuration available"); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // Check Ollama chat provider |         // Check Ollama chat provider | ||||||
|         const ollamaBaseUrl = await options.getOption('ollamaBaseUrl'); |         const ollamaBaseUrl = await options.getOption('ollamaBaseUrl'); | ||||||
|         if (ollamaBaseUrl) { |         if (ollamaBaseUrl) { | ||||||
|             result.validChatProviders.push('ollama'); |             result.validChatProviders.push('ollama'); | ||||||
|         } |             log.info("Ollama chat provider configuration available"); | ||||||
|  |  | ||||||
|         if (result.validChatProviders.length === 0) { |  | ||||||
|             result.warnings.push("No chat providers configured. Please configure at least one provider."); |  | ||||||
|         } |         } | ||||||
|  |  | ||||||
|     } catch (error: any) { |     } catch (error: any) { | ||||||
| @@ -127,51 +83,16 @@ async function checkChatProviderConfigs(result: ProviderValidationResult): Promi | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * Check if any chat providers are configured |  * Check if we have at least one valid embedding provider available | ||||||
|  |  * Returns false since embeddings have been removed | ||||||
|  */ |  */ | ||||||
| export async function hasWorkingChatProviders(): Promise<boolean> { | export async function getEmbeddingProviderAvailability(): Promise<boolean> { | ||||||
|     const validation = await validateProviders(); |     log.info("Embedding providers have been removed, returning false"); | ||||||
|     return validation.validChatProviders.length > 0; |     return false; | ||||||
| } | } | ||||||
|  |  | ||||||
| /** | export default { | ||||||
|  * Check if any embedding providers are configured (simplified) |     validateProviders, | ||||||
|  */ |     getEmbeddingProviderAvailability | ||||||
| export async function hasWorkingEmbeddingProviders(): Promise<boolean> { | }; | ||||||
|     if (!(await options.getOptionBool('aiEnabled'))) { |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     // Check if any embedding provider is configured |  | ||||||
|     const openaiKey = await options.getOption('openaiApiKey'); |  | ||||||
|     const openaiBaseUrl = await options.getOption('openaiBaseUrl'); |  | ||||||
|     const ollamaUrl = await options.getOption('ollamaEmbeddingBaseUrl'); |  | ||||||
|     const voyageKey = await options.getOption('voyageApiKey' as any); |  | ||||||
|      |  | ||||||
|     // Local provider is always available as fallback |  | ||||||
|     return !!(openaiKey || openaiBaseUrl || ollamaUrl || voyageKey) || true; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Log validation results in a user-friendly way |  | ||||||
|  */ |  | ||||||
| export function logValidationResults(validation: ProviderValidationResult): void { |  | ||||||
|     if (validation.hasValidProviders) { |  | ||||||
|         log.info(`AI provider validation passed: ${validation.validEmbeddingProviders.length} embedding providers, ${validation.validChatProviders.length} chat providers`); |  | ||||||
|          |  | ||||||
|         if (validation.validEmbeddingProviders.length > 0) { |  | ||||||
|             log.info(`Working embedding providers: ${validation.validEmbeddingProviders.map(p => p.name).join(', ')}`); |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|         if (validation.validChatProviders.length > 0) { |  | ||||||
|             log.info(`Working chat providers: ${validation.validChatProviders.join(', ')}`); |  | ||||||
|         } |  | ||||||
|     } else { |  | ||||||
|         log.info("AI provider validation failed: No working providers found"); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     validation.warnings.forEach(warning => log.info(`Provider validation: ${warning}`)); |  | ||||||
|     validation.errors.forEach(error => log.error(`Provider validation: ${error}`)); |  | ||||||
| } |  | ||||||
| @@ -1,13 +1,5 @@ | |||||||
| import options from "../../options.js"; | import options from "../../options.js"; | ||||||
| import log from "../../log.js"; | import log from "../../log.js"; | ||||||
| import sql from "../../sql.js"; |  | ||||||
| import dateUtils from "../../date_utils.js"; |  | ||||||
| import { randomString } from "../../utils.js"; |  | ||||||
| import type { EmbeddingProvider, EmbeddingConfig } from "../embeddings/embeddings_interface.js"; |  | ||||||
| import { NormalizationStatus } from "../embeddings/embeddings_interface.js"; |  | ||||||
| import { OpenAIEmbeddingProvider } from "../embeddings/providers/openai.js"; |  | ||||||
| import { OllamaEmbeddingProvider } from "../embeddings/providers/ollama.js"; |  | ||||||
| import { VoyageEmbeddingProvider } from "../embeddings/providers/voyage.js"; |  | ||||||
| import type { OptionDefinitions } from "@triliumnext/commons"; | import type { OptionDefinitions } from "@triliumnext/commons"; | ||||||
| import type { ChatCompletionOptions } from '../ai_interface.js'; | import type { ChatCompletionOptions } from '../ai_interface.js'; | ||||||
| import type { OpenAIOptions, AnthropicOptions, OllamaOptions, ModelMetadata } from './provider_options.js'; | import type { OpenAIOptions, AnthropicOptions, OllamaOptions, ModelMetadata } from './provider_options.js'; | ||||||
| @@ -19,347 +11,6 @@ import { | |||||||
| import { PROVIDER_CONSTANTS } from '../constants/provider_constants.js'; | import { PROVIDER_CONSTANTS } from '../constants/provider_constants.js'; | ||||||
| import { SEARCH_CONSTANTS, MODEL_CAPABILITIES } from '../constants/search_constants.js'; | import { SEARCH_CONSTANTS, MODEL_CAPABILITIES } from '../constants/search_constants.js'; | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Simple local embedding provider implementation |  | ||||||
|  * This avoids the need to import a separate file which might not exist |  | ||||||
|  */ |  | ||||||
| class SimpleLocalEmbeddingProvider implements EmbeddingProvider { |  | ||||||
|     name = "local"; |  | ||||||
|     config: EmbeddingConfig; |  | ||||||
|  |  | ||||||
|     constructor(config: EmbeddingConfig) { |  | ||||||
|         this.config = config; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     getConfig(): EmbeddingConfig { |  | ||||||
|         return this.config; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Returns the normalization status of the local provider |  | ||||||
|      * Local provider does not guarantee normalization |  | ||||||
|      */ |  | ||||||
|     getNormalizationStatus(): NormalizationStatus { |  | ||||||
|         return NormalizationStatus.NEVER; // Simple embedding does not normalize vectors |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     async generateEmbeddings(text: string): Promise<Float32Array> { |  | ||||||
|         // Create deterministic embeddings based on text content |  | ||||||
|         const result = new Float32Array(this.config.dimension || 384); |  | ||||||
|  |  | ||||||
|         // Simple hash-based approach |  | ||||||
|         for (let i = 0; i < result.length; i++) { |  | ||||||
|             // Use character codes and position to generate values between -1 and 1 |  | ||||||
|             const charSum = Array.from(text).reduce((sum, char, idx) => |  | ||||||
|                 sum + char.charCodeAt(0) * Math.sin(idx * 0.1), 0); |  | ||||||
|             result[i] = Math.sin(i * 0.1 + charSum * 0.01); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         return result; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     async generateBatchEmbeddings(texts: string[]): Promise<Float32Array[]> { |  | ||||||
|         return Promise.all(texts.map(text => this.generateEmbeddings(text))); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     async generateNoteEmbeddings(context: any): Promise<Float32Array> { |  | ||||||
|         // Combine text from context |  | ||||||
|         const text = (context.title || "") + " " + (context.content || ""); |  | ||||||
|         return this.generateEmbeddings(text); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     async generateBatchNoteEmbeddings(contexts: any[]): Promise<Float32Array[]> { |  | ||||||
|         return Promise.all(contexts.map(context => this.generateNoteEmbeddings(context))); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| const providers = new Map<string, EmbeddingProvider>(); |  | ||||||
|  |  | ||||||
| // Cache to track which provider errors have been logged |  | ||||||
| const loggedProviderErrors = new Set<string>(); |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Register a new embedding provider |  | ||||||
|  */ |  | ||||||
| export function registerEmbeddingProvider(provider: EmbeddingProvider) { |  | ||||||
|     providers.set(provider.name, provider); |  | ||||||
|     log.info(`Registered embedding provider: ${provider.name}`); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Unregister an embedding provider |  | ||||||
|  */ |  | ||||||
| export function unregisterEmbeddingProvider(name: string): boolean { |  | ||||||
|     const existed = providers.has(name); |  | ||||||
|     if (existed) { |  | ||||||
|         providers.delete(name); |  | ||||||
|         log.info(`Unregistered embedding provider: ${name}`); |  | ||||||
|     } |  | ||||||
|     return existed; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Clear all embedding providers |  | ||||||
|  */ |  | ||||||
| export function clearAllEmbeddingProviders(): void { |  | ||||||
|     const providerNames = Array.from(providers.keys()); |  | ||||||
|     providers.clear(); |  | ||||||
|     if (providerNames.length > 0) { |  | ||||||
|         log.info(`Cleared all embedding providers: ${providerNames.join(', ')}`); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Get all registered embedding providers |  | ||||||
|  */ |  | ||||||
| export function getEmbeddingProviders(): EmbeddingProvider[] { |  | ||||||
|     return Array.from(providers.values()); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Get a specific embedding provider by name |  | ||||||
|  */ |  | ||||||
| export function getEmbeddingProvider(name: string): EmbeddingProvider | undefined { |  | ||||||
|     return providers.get(name); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Get or create a specific embedding provider with inline validation |  | ||||||
|  */ |  | ||||||
| export async function getOrCreateEmbeddingProvider(providerName: string): Promise<EmbeddingProvider | null> { |  | ||||||
|     // Return existing provider if already created and valid |  | ||||||
|     const existing = providers.get(providerName); |  | ||||||
|     if (existing) { |  | ||||||
|         return existing; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Create and validate provider on-demand |  | ||||||
|     try { |  | ||||||
|         let provider: EmbeddingProvider | null = null; |  | ||||||
|  |  | ||||||
|         switch (providerName) { |  | ||||||
|             case 'ollama': { |  | ||||||
|                 const baseUrl = await options.getOption('ollamaEmbeddingBaseUrl'); |  | ||||||
|                 if (!baseUrl) return null; |  | ||||||
|  |  | ||||||
|                 const model = await options.getOption('ollamaEmbeddingModel'); |  | ||||||
|                 provider = new OllamaEmbeddingProvider({ |  | ||||||
|                     model, |  | ||||||
|                     dimension: 768, |  | ||||||
|                     type: 'float32', |  | ||||||
|                     baseUrl |  | ||||||
|                 }); |  | ||||||
|  |  | ||||||
|                 // Validate by initializing (if provider supports it) |  | ||||||
|                 if ('initialize' in provider && typeof provider.initialize === 'function') { |  | ||||||
|                     await provider.initialize(); |  | ||||||
|                 } |  | ||||||
|                 break; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             case 'openai': { |  | ||||||
|                 const apiKey = await options.getOption('openaiApiKey'); |  | ||||||
|                 const baseUrl = await options.getOption('openaiBaseUrl'); |  | ||||||
|                 if (!apiKey && !baseUrl) return null; |  | ||||||
|  |  | ||||||
|                 const model = await options.getOption('openaiEmbeddingModel'); |  | ||||||
|                 provider = new OpenAIEmbeddingProvider({ |  | ||||||
|                     model, |  | ||||||
|                     dimension: 1536, |  | ||||||
|                     type: 'float32', |  | ||||||
|                     apiKey: apiKey || '', |  | ||||||
|                     baseUrl: baseUrl || 'https://api.openai.com/v1' |  | ||||||
|                 }); |  | ||||||
|  |  | ||||||
|                 if (!apiKey) { |  | ||||||
|                     log.info('OpenAI embedding provider created without API key for compatible endpoints'); |  | ||||||
|                 } |  | ||||||
|                 break; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             case 'voyage': { |  | ||||||
|                 const apiKey = await options.getOption('voyageApiKey' as any); |  | ||||||
|                 if (!apiKey) return null; |  | ||||||
|  |  | ||||||
|                 const model = await options.getOption('voyageEmbeddingModel') || 'voyage-2'; |  | ||||||
|                 provider = new VoyageEmbeddingProvider({ |  | ||||||
|                     model, |  | ||||||
|                     dimension: 1024, |  | ||||||
|                     type: 'float32', |  | ||||||
|                     apiKey, |  | ||||||
|                     baseUrl: 'https://api.voyageai.com/v1' |  | ||||||
|                 }); |  | ||||||
|                 break; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             case 'local': { |  | ||||||
|                 provider = new SimpleLocalEmbeddingProvider({ |  | ||||||
|                     model: 'local', |  | ||||||
|                     dimension: 384, |  | ||||||
|                     type: 'float32' |  | ||||||
|                 }); |  | ||||||
|                 break; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             default: |  | ||||||
|                 return null; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (provider) { |  | ||||||
|             registerEmbeddingProvider(provider); |  | ||||||
|             log.info(`Created and validated ${providerName} embedding provider`); |  | ||||||
|             return provider; |  | ||||||
|         } |  | ||||||
|     } catch (error: any) { |  | ||||||
|         log.error(`Failed to create ${providerName} embedding provider: ${error.message || 'Unknown error'}`); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return null; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Get all enabled embedding providers for the specified feature |  | ||||||
|  */ |  | ||||||
| export async function getEnabledEmbeddingProviders(feature: 'embeddings' | 'chat' = 'embeddings'): Promise<EmbeddingProvider[]> { |  | ||||||
|     if (!(await options.getOptionBool('aiEnabled'))) { |  | ||||||
|         return []; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     const result: EmbeddingProvider[] = []; |  | ||||||
|  |  | ||||||
|     // Get the selected provider for the feature |  | ||||||
|     const selectedProvider = feature === 'embeddings' |  | ||||||
|         ? await options.getOption('embeddingSelectedProvider') |  | ||||||
|         : await options.getOption('aiSelectedProvider'); |  | ||||||
|  |  | ||||||
|         // Try to get or create the specific selected provider |  | ||||||
|         const provider = await getOrCreateEmbeddingProvider(selectedProvider); |  | ||||||
|         if (!provider) { |  | ||||||
|             throw new Error(`Failed to create selected embedding provider: ${selectedProvider}. Please check your configuration.`); |  | ||||||
|         } |  | ||||||
|         result.push(provider); |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     // Always ensure local provider as fallback |  | ||||||
|     const localProvider = await getOrCreateEmbeddingProvider('local'); |  | ||||||
|     if (localProvider && !result.some(p => p.name === 'local')) { |  | ||||||
|         result.push(localProvider); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return result; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Create a new embedding provider configuration in the database |  | ||||||
|  */ |  | ||||||
| export async function createEmbeddingProviderConfig( |  | ||||||
|     name: string, |  | ||||||
|     config: EmbeddingConfig, |  | ||||||
|     priority = 0 |  | ||||||
| ): Promise<string> { |  | ||||||
|     const providerId = randomString(16); |  | ||||||
|     const now = dateUtils.localNowDateTime(); |  | ||||||
|     const utcNow = dateUtils.utcNowDateTime(); |  | ||||||
|  |  | ||||||
|     await sql.execute(` |  | ||||||
|         INSERT INTO embedding_providers |  | ||||||
|         (providerId, name, priority, config, |  | ||||||
|          dateCreated, utcDateCreated, dateModified, utcDateModified) |  | ||||||
|         VALUES (?, ?, ?, ?, ?, ?, ?, ?)`, |  | ||||||
|         [providerId, name, priority, JSON.stringify(config), |  | ||||||
|             now, utcNow, now, utcNow] |  | ||||||
|     ); |  | ||||||
|  |  | ||||||
|     return providerId; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Update an existing embedding provider configuration |  | ||||||
|  */ |  | ||||||
| export async function updateEmbeddingProviderConfig( |  | ||||||
|     providerId: string, |  | ||||||
|     priority?: number, |  | ||||||
|     config?: EmbeddingConfig |  | ||||||
| ): Promise<boolean> { |  | ||||||
|     const now = dateUtils.localNowDateTime(); |  | ||||||
|     const utcNow = dateUtils.utcNowDateTime(); |  | ||||||
|  |  | ||||||
|     // Get existing provider |  | ||||||
|     const provider = await sql.getRow( |  | ||||||
|         "SELECT * FROM embedding_providers WHERE providerId = ?", |  | ||||||
|         [providerId] |  | ||||||
|     ); |  | ||||||
|  |  | ||||||
|     if (!provider) { |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Build update query parts |  | ||||||
|     const updates: string[] = []; |  | ||||||
|     const params: any[] = []; |  | ||||||
|  |  | ||||||
|     if (priority !== undefined) { |  | ||||||
|         updates.push("priority = ?"); |  | ||||||
|         params.push(priority); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     if (config) { |  | ||||||
|         updates.push("config = ?"); |  | ||||||
|         params.push(JSON.stringify(config)); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     if (updates.length === 0) { |  | ||||||
|         return true; // Nothing to update |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     updates.push("dateModified = ?"); |  | ||||||
|     updates.push("utcDateModified = ?"); |  | ||||||
|     params.push(now, utcNow); |  | ||||||
|  |  | ||||||
|     params.push(providerId); |  | ||||||
|  |  | ||||||
|     // Execute update |  | ||||||
|     await sql.execute( |  | ||||||
|         `UPDATE embedding_providers SET ${updates.join(", ")} WHERE providerId = ?`, |  | ||||||
|         params |  | ||||||
|     ); |  | ||||||
|  |  | ||||||
|     return true; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Delete an embedding provider configuration |  | ||||||
|  */ |  | ||||||
| export async function deleteEmbeddingProviderConfig(providerId: string): Promise<boolean> { |  | ||||||
|     const result = await sql.execute( |  | ||||||
|         "DELETE FROM embedding_providers WHERE providerId = ?", |  | ||||||
|         [providerId] |  | ||||||
|     ); |  | ||||||
|  |  | ||||||
|     return result.changes > 0; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Get all embedding provider configurations from the database |  | ||||||
|  */ |  | ||||||
| export async function getEmbeddingProviderConfigs() { |  | ||||||
|     return await sql.getRows("SELECT * FROM embedding_providers ORDER BY priority DESC"); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| export default { |  | ||||||
|     registerEmbeddingProvider, |  | ||||||
|     unregisterEmbeddingProvider, |  | ||||||
|     clearAllEmbeddingProviders, |  | ||||||
|     getEmbeddingProviders, |  | ||||||
|     getEmbeddingProvider, |  | ||||||
|     getEnabledEmbeddingProviders, |  | ||||||
|     getOrCreateEmbeddingProvider, |  | ||||||
|     createEmbeddingProviderConfig, |  | ||||||
|     updateEmbeddingProviderConfig, |  | ||||||
|     deleteEmbeddingProviderConfig, |  | ||||||
|     getEmbeddingProviderConfigs |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * Get OpenAI provider options from chat options and configuration |  * Get OpenAI provider options from chat options and configuration | ||||||
|  * Updated to use provider metadata approach |  * Updated to use provider metadata approach | ||||||
| @@ -598,4 +249,4 @@ async function getOllamaModelContextWindow(modelName: string): Promise<number> { | |||||||
|         log.info(`Error getting context window for model ${modelName}: ${error}`); |         log.info(`Error getting context window for model ${modelName}: ${error}`); | ||||||
|         return MODEL_CAPABILITIES['default'].contextWindowTokens; // Default fallback |         return MODEL_CAPABILITIES['default'].contextWindowTokens; // Default fallback | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -130,8 +130,8 @@ export class NoteSummarizationTool implements ToolHandler { | |||||||
|                 { role: 'system', content: 'You are a skilled summarizer. Create concise, accurate summaries while preserving the key information.' }, |                 { role: 'system', content: 'You are a skilled summarizer. Create concise, accurate summaries while preserving the key information.' }, | ||||||
|                 { role: 'user', content: prompt } |                 { role: 'user', content: prompt } | ||||||
|             ], { |             ], { | ||||||
|                 temperature: SEARCH_CONSTANTS.TEMPERATURE.VECTOR_SEARCH, // Lower temperature for more focused summaries |                 temperature: SEARCH_CONSTANTS.TEMPERATURE.QUERY_PROCESSOR, // Lower temperature for more focused summaries | ||||||
|                 maxTokens: SEARCH_CONSTANTS.LIMITS.VECTOR_SEARCH_MAX_TOKENS // Enough tokens for the summary |                 maxTokens: SEARCH_CONSTANTS.LIMITS.DEFAULT_MAX_TOKENS // Enough tokens for the summary | ||||||
|             }); |             }); | ||||||
|  |  | ||||||
|             const summaryDuration = Date.now() - summaryStartTime; |             const summaryDuration = Date.now() - summaryStartTime; | ||||||
|   | |||||||
| @@ -10,7 +10,24 @@ import becca from '../../../becca/becca.js'; | |||||||
| import attributes from '../../attributes.js'; | import attributes from '../../attributes.js'; | ||||||
| import aiServiceManager from '../ai_service_manager.js'; | import aiServiceManager from '../ai_service_manager.js'; | ||||||
| import { SEARCH_CONSTANTS } from '../constants/search_constants.js'; | import { SEARCH_CONSTANTS } from '../constants/search_constants.js'; | ||||||
| import type { Backlink, RelatedNote } from '../embeddings/embeddings_interface.js'; | import searchService from '../../search/services/search.js'; | ||||||
|  | // Define types locally since embeddings are no longer available | ||||||
|  | interface Backlink { | ||||||
|  |     noteId: string; | ||||||
|  |     title: string; | ||||||
|  |     relationName: string; | ||||||
|  |     sourceNoteId: string; | ||||||
|  |     sourceTitle: string; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | interface RelatedNote { | ||||||
|  |     noteId: string; | ||||||
|  |     title: string; | ||||||
|  |     similarity: number; | ||||||
|  |     relationName: string; | ||||||
|  |     targetNoteId: string; | ||||||
|  |     targetTitle: string; | ||||||
|  | } | ||||||
|  |  | ||||||
| interface Suggestion { | interface Suggestion { | ||||||
|     targetNoteId: string; |     targetNoteId: string; | ||||||
| @@ -195,6 +212,9 @@ export class RelationshipTool implements ToolHandler { | |||||||
|  |  | ||||||
|                 if (targetNote) { |                 if (targetNote) { | ||||||
|                     outgoingRelations.push({ |                     outgoingRelations.push({ | ||||||
|  |                         noteId: targetNote.noteId, | ||||||
|  |                         title: targetNote.title, | ||||||
|  |                         similarity: 1.0, | ||||||
|                         relationName: attr.name, |                         relationName: attr.name, | ||||||
|                         targetNoteId: targetNote.noteId, |                         targetNoteId: targetNote.noteId, | ||||||
|                         targetTitle: targetNote.title |                         targetTitle: targetNote.title | ||||||
| @@ -215,6 +235,8 @@ export class RelationshipTool implements ToolHandler { | |||||||
|  |  | ||||||
|                     if (sourceOfRelation && !sourceOfRelation.isDeleted) { |                     if (sourceOfRelation && !sourceOfRelation.isDeleted) { | ||||||
|                         incomingRelations.push({ |                         incomingRelations.push({ | ||||||
|  |                             noteId: sourceOfRelation.noteId, | ||||||
|  |                             title: sourceOfRelation.title, | ||||||
|                             relationName: attr.name, |                             relationName: attr.name, | ||||||
|                             sourceNoteId: sourceOfRelation.noteId, |                             sourceNoteId: sourceOfRelation.noteId, | ||||||
|                             sourceTitle: sourceOfRelation.title |                             sourceTitle: sourceOfRelation.title | ||||||
| @@ -244,51 +266,87 @@ export class RelationshipTool implements ToolHandler { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
|      * Find related notes using vector similarity |      * Find related notes using TriliumNext's search service | ||||||
|      */ |      */ | ||||||
|     private async findRelatedNotes(sourceNote: any, limit: number): Promise<object> { |     private async findRelatedNotes(sourceNote: any, limit: number): Promise<object> { | ||||||
|         try { |         try { | ||||||
|             // Get the vector search tool from the AI service manager |             log.info(`Using TriliumNext search to find notes related to "${sourceNote.title}"`); | ||||||
|             const vectorSearchTool = aiServiceManager.getVectorSearchTool(); |  | ||||||
|  |  | ||||||
|             if (!vectorSearchTool) { |             // Get note content for search | ||||||
|                 log.error('Vector search tool not available'); |             const content = sourceNote.getContent(); | ||||||
|                 return { |  | ||||||
|                     success: false, |  | ||||||
|                     message: 'Vector search capability not available' |  | ||||||
|                 }; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             log.info(`Using vector search to find notes related to "${sourceNote.title}"`); |  | ||||||
|  |  | ||||||
|             // Get note content for semantic search |  | ||||||
|             const content = await sourceNote.getContent(); |  | ||||||
|             const title = sourceNote.title; |             const title = sourceNote.title; | ||||||
|  |  | ||||||
|             // Use both title and content for search |             // Create search queries from the note title and content | ||||||
|             const searchQuery = title + (content && typeof content === 'string' ? ': ' + content.substring(0, 500) : ''); |             const searchQueries = [title]; | ||||||
|  |              | ||||||
|  |             // Extract key terms from content if available | ||||||
|  |             if (content && typeof content === 'string') { | ||||||
|  |                 // Extract meaningful words from content (filter out common words) | ||||||
|  |                 const contentWords = content | ||||||
|  |                     .toLowerCase() | ||||||
|  |                     .split(/\s+/) | ||||||
|  |                     .filter(word => word.length > 3) | ||||||
|  |                     .filter(word => !/^(the|and|but|for|are|from|they|been|have|this|that|with|will|when|where|what|how)$/.test(word)) | ||||||
|  |                     .slice(0, 10); // Take first 10 meaningful words | ||||||
|  |                  | ||||||
|  |                 if (contentWords.length > 0) { | ||||||
|  |                     searchQueries.push(contentWords.join(' ')); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |  | ||||||
|             // Execute the search |             // Execute searches and combine results | ||||||
|             const searchStartTime = Date.now(); |             const searchStartTime = Date.now(); | ||||||
|             const results = await vectorSearchTool.searchNotes(searchQuery, { |             const allResults = new Map<string, any>(); | ||||||
|                 maxResults: limit + 1 // Add 1 to account for the source note itself |             let searchDuration = 0; | ||||||
|             }); |  | ||||||
|             const searchDuration = Date.now() - searchStartTime; |  | ||||||
|  |  | ||||||
|             // Filter out the source note from results |             for (const query of searchQueries) { | ||||||
|             const filteredResults = results.filter(note => note.noteId !== sourceNote.noteId); |                 try { | ||||||
|             log.info(`Found ${filteredResults.length} related notes in ${searchDuration}ms`); |                     const results = searchService.searchNotes(query, {  | ||||||
|  |                         includeArchivedNotes: false, | ||||||
|  |                         fastSearch: false // Use full search for better results | ||||||
|  |                     }); | ||||||
|  |                      | ||||||
|  |                     // Add results to our map (avoiding duplicates) | ||||||
|  |                     for (const note of results.slice(0, limit * 2)) { // Get more to account for duplicates | ||||||
|  |                         if (note.noteId !== sourceNote.noteId && !note.isDeleted) { | ||||||
|  |                             allResults.set(note.noteId, { | ||||||
|  |                                 noteId: note.noteId, | ||||||
|  |                                 title: note.title, | ||||||
|  |                                 similarity: 0.8 // Base similarity for search results | ||||||
|  |                             }); | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } catch (error) { | ||||||
|  |                     log.error(`Search query failed: ${query} - ${error}`); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             searchDuration = Date.now() - searchStartTime; | ||||||
|  |  | ||||||
|  |             // Also add notes that are directly related via attributes | ||||||
|  |             const directlyRelatedNotes = this.getDirectlyRelatedNotes(sourceNote); | ||||||
|  |             for (const note of directlyRelatedNotes) { | ||||||
|  |                 if (!allResults.has(note.noteId)) { | ||||||
|  |                     allResults.set(note.noteId, { | ||||||
|  |                         noteId: note.noteId, | ||||||
|  |                         title: note.title, | ||||||
|  |                         similarity: 1.0 // Higher similarity for directly related notes | ||||||
|  |                     }); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             const relatedNotes = Array.from(allResults.values()) | ||||||
|  |                 .sort((a, b) => b.similarity - a.similarity) // Sort by similarity | ||||||
|  |                 .slice(0, limit); | ||||||
|  |  | ||||||
|  |             log.info(`Found ${relatedNotes.length} related notes in ${searchDuration}ms`); | ||||||
|  |  | ||||||
|             return { |             return { | ||||||
|                 success: true, |                 success: true, | ||||||
|                 noteId: sourceNote.noteId, |                 noteId: sourceNote.noteId, | ||||||
|                 title: sourceNote.title, |                 title: sourceNote.title, | ||||||
|                 relatedNotes: filteredResults.slice(0, limit).map(note => ({ |                 relatedNotes: relatedNotes, | ||||||
|                     noteId: note.noteId, |                 message: `Found ${relatedNotes.length} notes related to "${sourceNote.title}" using search and relationship analysis` | ||||||
|                     title: note.title, |  | ||||||
|                     similarity: Math.round(note.similarity * 100) / 100 |  | ||||||
|                 })), |  | ||||||
|                 message: `Found ${filteredResults.length} notes semantically related to "${sourceNote.title}"` |  | ||||||
|             }; |             }; | ||||||
|         } catch (error: any) { |         } catch (error: any) { | ||||||
|             log.error(`Error finding related notes: ${error.message || String(error)}`); |             log.error(`Error finding related notes: ${error.message || String(error)}`); | ||||||
| @@ -296,6 +354,55 @@ export class RelationshipTool implements ToolHandler { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /** | ||||||
|  |      * Get notes that are directly related through attributes/relations | ||||||
|  |      */ | ||||||
|  |     private getDirectlyRelatedNotes(sourceNote: any): any[] { | ||||||
|  |         const relatedNotes: any[] = []; | ||||||
|  |  | ||||||
|  |         try { | ||||||
|  |             // Get outgoing relations | ||||||
|  |             const outgoingAttributes = sourceNote.getAttributes().filter((attr: any) => attr.type === 'relation'); | ||||||
|  |             for (const attr of outgoingAttributes) { | ||||||
|  |                 const targetNote = becca.notes[attr.value]; | ||||||
|  |                 if (targetNote && !targetNote.isDeleted) { | ||||||
|  |                     relatedNotes.push(targetNote); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             // Get incoming relations | ||||||
|  |             const incomingRelations = sourceNote.getTargetRelations(); | ||||||
|  |             for (const attr of incomingRelations) { | ||||||
|  |                 if (attr.type === 'relation') { | ||||||
|  |                     const sourceOfRelation = attr.getNote(); | ||||||
|  |                     if (sourceOfRelation && !sourceOfRelation.isDeleted) { | ||||||
|  |                         relatedNotes.push(sourceOfRelation); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             // Get parent and child notes | ||||||
|  |             const parentNotes = sourceNote.getParentNotes(); | ||||||
|  |             for (const parent of parentNotes) { | ||||||
|  |                 if (!parent.isDeleted) { | ||||||
|  |                     relatedNotes.push(parent); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             const childNotes = sourceNote.getChildNotes(); | ||||||
|  |             for (const child of childNotes) { | ||||||
|  |                 if (!child.isDeleted) { | ||||||
|  |                     relatedNotes.push(child); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |         } catch (error) { | ||||||
|  |             log.error(`Error getting directly related notes: ${error}`); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         return relatedNotes; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
|      * Suggest possible relationships based on content analysis |      * Suggest possible relationships based on content analysis | ||||||
|      */ |      */ | ||||||
|   | |||||||
| @@ -196,38 +196,19 @@ const defaultOptions: DefaultOption[] = [ | |||||||
|     { name: "aiEnabled", value: "false", isSynced: true }, |     { name: "aiEnabled", value: "false", isSynced: true }, | ||||||
|     { name: "openaiApiKey", value: "", isSynced: false }, |     { name: "openaiApiKey", value: "", isSynced: false }, | ||||||
|     { name: "openaiDefaultModel", value: "", isSynced: true }, |     { name: "openaiDefaultModel", value: "", isSynced: true }, | ||||||
|     { name: "openaiEmbeddingModel", value: "", isSynced: true }, |  | ||||||
|     { name: "openaiBaseUrl", value: "https://api.openai.com/v1", isSynced: true }, |     { name: "openaiBaseUrl", value: "https://api.openai.com/v1", isSynced: true }, | ||||||
|     { name: "anthropicApiKey", value: "", isSynced: false }, |     { name: "anthropicApiKey", value: "", isSynced: false }, | ||||||
|     { name: "anthropicDefaultModel", value: "", isSynced: true }, |     { name: "anthropicDefaultModel", value: "", isSynced: true }, | ||||||
|     { name: "voyageEmbeddingModel", value: "", isSynced: true }, |  | ||||||
|     { name: "voyageApiKey", value: "", isSynced: false }, |     { name: "voyageApiKey", value: "", isSynced: false }, | ||||||
|     { name: "anthropicBaseUrl", value: "https://api.anthropic.com/v1", isSynced: true }, |     { name: "anthropicBaseUrl", value: "https://api.anthropic.com/v1", isSynced: true }, | ||||||
|     { name: "ollamaEnabled", value: "false", isSynced: true }, |     { name: "ollamaEnabled", value: "false", isSynced: true }, | ||||||
|     { name: "ollamaDefaultModel", value: "", isSynced: true }, |     { name: "ollamaDefaultModel", value: "", isSynced: true }, | ||||||
|     { name: "ollamaBaseUrl", value: "http://localhost:11434", isSynced: true }, |     { name: "ollamaBaseUrl", value: "http://localhost:11434", isSynced: true }, | ||||||
|     { name: "ollamaEmbeddingModel", value: "", isSynced: true }, |  | ||||||
|     { name: "embeddingAutoUpdateEnabled", value: "true", isSynced: true }, |  | ||||||
|  |  | ||||||
|     // Embedding-specific provider options |  | ||||||
|     { name: "openaiEmbeddingApiKey", value: "", isSynced: false }, |  | ||||||
|     { name: "openaiEmbeddingBaseUrl", value: "https://api.openai.com/v1", isSynced: true }, |  | ||||||
|     { name: "voyageEmbeddingBaseUrl", value: "https://api.voyageai.com/v1", isSynced: true }, |  | ||||||
|     { name: "ollamaEmbeddingBaseUrl", value: "http://localhost:11434", isSynced: true }, |  | ||||||
|  |  | ||||||
|     // Adding missing AI options |     // Adding missing AI options | ||||||
|     { name: "aiTemperature", value: "0.7", isSynced: true }, |     { name: "aiTemperature", value: "0.7", isSynced: true }, | ||||||
|     { name: "aiSystemPrompt", value: "", isSynced: true }, |     { name: "aiSystemPrompt", value: "", isSynced: true }, | ||||||
|     { name: "aiSelectedProvider", value: "openai", isSynced: true }, |     { name: "aiSelectedProvider", value: "openai", isSynced: true }, | ||||||
|     { name: "embeddingDimensionStrategy", value: "auto", isSynced: true }, |  | ||||||
|     { name: "embeddingSelectedProvider", value: "openai", isSynced: true }, |  | ||||||
|     { name: "embeddingSimilarityThreshold", value: "0.75", isSynced: true }, |  | ||||||
|     { name: "enableAutomaticIndexing", value: "true", isSynced: true }, |  | ||||||
|     { name: "maxNotesPerLlmQuery", value: "3", isSynced: true }, |  | ||||||
|     { name: "embeddingBatchSize", value: "10", isSynced: true }, |  | ||||||
|     { name: "embeddingUpdateInterval", value: "5000", isSynced: true }, |  | ||||||
|     { name: "embeddingDefaultDimension", value: "1536", isSynced: true }, |  | ||||||
|     { name: "embeddingGenerationLocation", value: "client", isSynced: true }, |  | ||||||
| ]; | ]; | ||||||
|  |  | ||||||
| /** | /** | ||||||
|   | |||||||
| @@ -131,35 +131,17 @@ export interface OptionDefinitions extends KeyboardShortcutsOptions<KeyboardActi | |||||||
|     aiTemperature: string; |     aiTemperature: string; | ||||||
|     openaiApiKey: string; |     openaiApiKey: string; | ||||||
|     openaiDefaultModel: string; |     openaiDefaultModel: string; | ||||||
|     openaiEmbeddingModel: string; |  | ||||||
|     openaiEmbeddingApiKey: string; |  | ||||||
|     openaiEmbeddingBaseUrl: string; |  | ||||||
|     openaiBaseUrl: string; |     openaiBaseUrl: string; | ||||||
|     anthropicApiKey: string; |     anthropicApiKey: string; | ||||||
|     anthropicDefaultModel: string; |     anthropicDefaultModel: string; | ||||||
|     voyageEmbeddingModel: string; |  | ||||||
|     voyageApiKey: string; |     voyageApiKey: string; | ||||||
|     voyageEmbeddingBaseUrl: string; |  | ||||||
|     anthropicBaseUrl: string; |     anthropicBaseUrl: string; | ||||||
|     ollamaEnabled: boolean; |     ollamaEnabled: boolean; | ||||||
|     ollamaBaseUrl: string; |     ollamaBaseUrl: string; | ||||||
|     ollamaDefaultModel: string; |     ollamaDefaultModel: string; | ||||||
|     ollamaEmbeddingModel: string; |  | ||||||
|     ollamaEmbeddingBaseUrl: string; |  | ||||||
|     codeOpenAiModel: string; |     codeOpenAiModel: string; | ||||||
|     aiSelectedProvider: string; |     aiSelectedProvider: string; | ||||||
|  |  | ||||||
|     // Embedding-related options |  | ||||||
|     embeddingAutoUpdateEnabled: boolean; |  | ||||||
|     embeddingUpdateInterval: number; |  | ||||||
|     embeddingBatchSize: number; |  | ||||||
|     embeddingDefaultDimension: number; |  | ||||||
|     embeddingSelectedProvider: string; |  | ||||||
|     enableAutomaticIndexing: boolean; |  | ||||||
|     embeddingGenerationLocation: string; |  | ||||||
|     embeddingDimensionStrategy: string; |  | ||||||
|     embeddingSimilarityThreshold: number; |  | ||||||
|     maxNotesPerLlmQuery: number; |  | ||||||
| } | } | ||||||
|  |  | ||||||
| export type OptionNames = keyof OptionDefinitions; | export type OptionNames = keyof OptionDefinitions; | ||||||
|   | |||||||
| @@ -140,16 +140,3 @@ export interface NoteRow { | |||||||
|     content?: string | Buffer; |     content?: string | Buffer; | ||||||
| } | } | ||||||
|  |  | ||||||
| export interface NoteEmbeddingRow { |  | ||||||
|     embedId: string; |  | ||||||
|     noteId: string; |  | ||||||
|     providerId: string; |  | ||||||
|     modelId: string; |  | ||||||
|     dimension: number; |  | ||||||
|     embedding: Buffer; |  | ||||||
|     version: number; |  | ||||||
|     dateCreated: string; |  | ||||||
|     utcDateCreated: string; |  | ||||||
|     dateModified: string; |  | ||||||
|     utcDateModified: string; |  | ||||||
| } |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user