mirror of
				https://github.com/zadam/trilium.git
				synced 2025-10-31 02:16:05 +01:00 
			
		
		
		
	this is absolutely cursed, but query decomp works now
This commit is contained in:
		| @@ -99,29 +99,56 @@ Example: ["exact topic mentioned", "related concept 1", "related concept 2"]` | ||||
|                     .replace(/[\u201C\u201D]/g, '"')  // Replace smart quotes with straight quotes | ||||
|                     .trim(); | ||||
|  | ||||
|                 // Check if the text might contain a JSON array (has square brackets) | ||||
|                 if (jsonStr.includes('[') && jsonStr.includes(']')) { | ||||
|                     // Extract just the array part if there's explanatory text | ||||
|                     const arrayMatch = jsonStr.match(/\[[\s\S]*\]/); | ||||
|                     if (arrayMatch) { | ||||
|                         jsonStr = arrayMatch[0]; | ||||
|                 log.info(`Cleaned JSON string: ${jsonStr}`); | ||||
|  | ||||
|                 // Check if the text might contain a JSON structure (has curly braces or square brackets) | ||||
|                 if ((jsonStr.includes('{') && jsonStr.includes('}')) || (jsonStr.includes('[') && jsonStr.includes(']'))) { | ||||
|                     // Try to extract the JSON structure | ||||
|                     let jsonMatch = jsonStr.match(/(\{[\s\S]*\}|\[[\s\S]*\])/); | ||||
|                     if (jsonMatch) { | ||||
|                         jsonStr = jsonMatch[0]; | ||||
|                         log.info(`Extracted JSON structure: ${jsonStr}`); | ||||
|                     } | ||||
|  | ||||
|                     // Try to parse the JSON | ||||
|                     try { | ||||
|                         const queries = JSON.parse(jsonStr); | ||||
|                         if (Array.isArray(queries) && queries.length > 0) { | ||||
|                             const result = queries.map(q => typeof q === 'string' ? q : String(q)).filter(Boolean); | ||||
|                         const parsed = JSON.parse(jsonStr); | ||||
|  | ||||
|                         // Handle array format: ["query1", "query2"] | ||||
|                         if (Array.isArray(parsed)) { | ||||
|                             const result = parsed | ||||
|                                 .map(q => typeof q === 'string' ? q.trim() : String(q).trim()) | ||||
|                                 .filter(Boolean); | ||||
|                             cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result); | ||||
|                             return result; | ||||
|                         } | ||||
|                     } catch (innerError) { | ||||
|                         // If parsing fails, log it and continue to the fallback | ||||
|                         log.info(`JSON parse error: ${innerError}. Will use fallback parsing for: ${jsonStr}`); | ||||
|                         // Handle object format: {"query1": "reason1", "query2": "reason2"} or {"query1" : "query2"} | ||||
|                         else if (typeof parsed === 'object' && parsed !== null) { | ||||
|                             // Extract both keys and values as potential queries | ||||
|                             const keys = Object.keys(parsed); | ||||
|                             const values = Object.values(parsed); | ||||
|  | ||||
|                             // Add keys as queries | ||||
|                             const keysResult = keys | ||||
|                                 .filter(key => key && key.length > 3) | ||||
|                                 .map(key => key.trim()); | ||||
|  | ||||
|                             // Add values as queries if they're strings and not already included | ||||
|                             const valuesResult = values | ||||
|                                 .filter((val): val is string => typeof val === 'string' && val.length > 3) | ||||
|                                 .map(val => val.trim()) | ||||
|                                 .filter(val => !keysResult.includes(val)); | ||||
|  | ||||
|                             const result = [...keysResult, ...valuesResult]; | ||||
|                             cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result); | ||||
|                             return result; | ||||
|                         } | ||||
|                     } catch (parseError) { | ||||
|                         log.info(`JSON parse error: ${parseError}. Will use fallback parsing.`); | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
|                 // Fallback 1: Try to extract an array manually by splitting on commas between quotes | ||||
|                 // Fallback: Try to extract an array manually by splitting on commas between quotes | ||||
|                 if (jsonStr.includes('[') && jsonStr.includes(']')) { | ||||
|                     const arrayContent = jsonStr.substring( | ||||
|                         jsonStr.indexOf('[') + 1, | ||||
| @@ -132,32 +159,43 @@ Example: ["exact topic mentioned", "related concept 1", "related concept 2"]` | ||||
|                     const stringMatches = arrayContent.match(/"((?:\\.|[^"\\])*)"/g); | ||||
|                     if (stringMatches && stringMatches.length > 0) { | ||||
|                         const result = stringMatches | ||||
|                             .map((m: string) => m.substring(1, m.length - 1)) // Remove surrounding quotes | ||||
|                             .map((m: string) => m.substring(1, m.length - 1).trim()) // Remove surrounding quotes | ||||
|                             .filter((s: string) => s.length > 0); | ||||
|                         cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result); | ||||
|                         return result; | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
|                 // Fallback 2: Extract queries line by line | ||||
|                 const lines = responseText.split('\n') | ||||
|                     .map((line: string) => line.trim()) | ||||
|                     .filter((line: string) => | ||||
|                         line.length > 0 && | ||||
|                         !line.startsWith('```') && | ||||
|                         !line.match(/^\d+\.?\s*$/) && // Skip numbered list markers alone | ||||
|                         !line.match(/^\[|\]$/) // Skip lines that are just brackets | ||||
|                 // Fallback: Try to extract key-value pairs from object notation manually | ||||
|                 if (jsonStr.includes('{') && jsonStr.includes('}')) { | ||||
|                     // Extract content between curly braces | ||||
|                     const objectContent = jsonStr.substring( | ||||
|                         jsonStr.indexOf('{') + 1, | ||||
|                         jsonStr.lastIndexOf('}') | ||||
|                     ); | ||||
|  | ||||
|                 if (lines.length > 0) { | ||||
|                     // Remove numbering, quotes and other list markers from each line | ||||
|                     const result = lines.map((line: string) => { | ||||
|                         return line | ||||
|                             .replace(/^\d+\.?\s*/, '') // Remove numbered list markers (1., 2., etc) | ||||
|                             .replace(/^[-*•]\s*/, '')  // Remove bullet list markers | ||||
|                             .replace(/^["']|["']$/g, '') // Remove surrounding quotes | ||||
|                             .trim(); | ||||
|                     }).filter((s: string) => s.length > 0); | ||||
|                     // Split by commas that aren't inside quotes | ||||
|                     const pairs: string[] = objectContent.split(/,(?=(?:[^"]*"[^"]*")*[^"]*$)/); | ||||
|  | ||||
|                     const result = pairs | ||||
|                         .map(pair => { | ||||
|                             // Split by colon that isn't inside quotes | ||||
|                             const keyValue = pair.split(/:(?=(?:[^"]*"[^"]*")*[^"]*$)/); | ||||
|                             if (keyValue.length === 2) { | ||||
|                                 const key = keyValue[0].replace(/"/g, '').trim(); | ||||
|                                 const value = keyValue[1].replace(/"/g, '').trim(); | ||||
|  | ||||
|                                 if (key && key.length > 3) { | ||||
|                                     return key; | ||||
|                                 } | ||||
|  | ||||
|                                 if (value && value.length > 3) { | ||||
|                                     return value; | ||||
|                                 } | ||||
|                             } | ||||
|                             return null; | ||||
|                         }) | ||||
|                         .filter((s: string | null) => s !== null); | ||||
|  | ||||
|                     cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result); | ||||
|                     return result; | ||||
| @@ -269,40 +307,24 @@ Example: ["exact topic mentioned", "related concept 1", "related concept 2"]` | ||||
|         context?: string | ||||
|     ): Promise<SubQuery[]> { | ||||
|         try { | ||||
|             // Create a simple prompt for query decomposition | ||||
|             const prompt = `Decompose the following query into 3-5 specific search queries that would be effective for vector search. | ||||
|  | ||||
| Your goal is to help find comprehensive information by breaking down the query into multiple search terms. | ||||
|  | ||||
| IMPORTANT: DO NOT just reword the original query. Create MULTIPLE DISTINCT queries that explore different aspects. | ||||
|  | ||||
| For example, if the query is "What are Docker containers?", good sub-queries would be: | ||||
| 1. "Docker container architecture and components" | ||||
| 2. "Docker vs virtual machines differences" | ||||
| 3. "Docker container use cases and benefits" | ||||
| 4. "Docker container deployment best practices" | ||||
|  | ||||
| Format your response as a JSON array of objects with 'text' and 'reason' properties. | ||||
| Example: [ | ||||
|   {"text": "Docker container architecture", "reason": "Understanding the technical structure"}, | ||||
|   {"text": "Docker vs virtual machines", "reason": "Comparing with alternative technologies"}, | ||||
|   {"text": "Docker container benefits", "reason": "Understanding advantages and use cases"}, | ||||
|   {"text": "Docker deployment best practices", "reason": "Learning practical implementation"} | ||||
| ] | ||||
|  | ||||
| ${context ? `\nContext: ${context}` : ''} | ||||
|  | ||||
| Query: ${query}`; | ||||
|             // Use the proven prompt format that was working before | ||||
|             const prompt = `You are an AI assistant that decides what information needs to be retrieved from a user's knowledge base called TriliumNext Notes to answer the user's question. | ||||
| Given the user's question, generate 3-5 specific search queries that would help find relevant information. | ||||
| Each query should be focused on a different aspect of the question. | ||||
| Avoid generating queries that are too broad, vague, or about a user's entire Note database, and make sure they are relevant to the user's question. | ||||
| Format your answer as a JSON array of strings, with each string being a search query. | ||||
| Example: ["exact topic mentioned", "related concept 1", "related concept 2"]`; | ||||
|  | ||||
|             log.info(`Sending decomposition prompt to LLM for query: "${query}"`); | ||||
|  | ||||
|             const messages = [ | ||||
|                 { role: "system" as const, content: prompt } | ||||
|                 { role: "system" as const, content: prompt }, | ||||
|                 { role: "user" as const, content: query } | ||||
|             ]; | ||||
|  | ||||
|             const options = { | ||||
|                 temperature: 0.7, | ||||
|                 maxTokens: SEARCH_CONSTANTS.LIMITS.QUERY_PROCESSOR_MAX_TOKENS, | ||||
|                 temperature: 0.3, | ||||
|                 maxTokens: 300, | ||||
|                 bypassFormatter: true, | ||||
|                 expectsJsonResponse: true, | ||||
|                 _bypassContextProcessing: true, | ||||
| @@ -315,78 +337,144 @@ Query: ${query}`; | ||||
|  | ||||
|             log.info(`Received LLM response for decomposition: ${responseText.substring(0, 200)}...`); | ||||
|  | ||||
|             // Try to parse the response as JSON | ||||
|             let subQueries: SubQuery[] = []; | ||||
|             // Parse the response to extract the queries | ||||
|             let searchQueries: string[] = []; | ||||
|             try { | ||||
|                 // Extract the JSON from the response | ||||
|                 const extractedJson = JsonExtractor.extract(responseText, { | ||||
|                     extractArrays: true, | ||||
|                     applyFixes: true, | ||||
|                     useFallbacks: true | ||||
|                 // Remove code blocks, quotes, and clean up the response text | ||||
|                 let jsonStr = responseText | ||||
|                     .replace(/```(?:json)?|```/g, '') // Remove code block markers | ||||
|                     .replace(/[\u201C\u201D]/g, '"')  // Replace smart quotes with straight quotes | ||||
|                     .trim(); | ||||
|  | ||||
|                 log.info(`Cleaned JSON string: ${jsonStr}`); | ||||
|  | ||||
|                 // Check if the text might contain a JSON structure (has curly braces or square brackets) | ||||
|                 if ((jsonStr.includes('{') && jsonStr.includes('}')) || (jsonStr.includes('[') && jsonStr.includes(']'))) { | ||||
|                     // Try to extract the JSON structure | ||||
|                     let jsonMatch = jsonStr.match(/(\{[\s\S]*\}|\[[\s\S]*\])/); | ||||
|                     if (jsonMatch) { | ||||
|                         jsonStr = jsonMatch[0]; | ||||
|                         log.info(`Extracted JSON structure: ${jsonStr}`); | ||||
|                     } | ||||
|  | ||||
|                     // Try to parse the JSON | ||||
|                     try { | ||||
|                         const parsed = JSON.parse(jsonStr); | ||||
|  | ||||
|                         // Handle array format: ["query1", "query2"] | ||||
|                         if (Array.isArray(parsed)) { | ||||
|                             searchQueries = parsed | ||||
|                                 .map(q => typeof q === 'string' ? q.trim() : String(q).trim()) | ||||
|                                 .filter(Boolean); | ||||
|                             log.info(`Extracted ${searchQueries.length} queries from JSON array`); | ||||
|                         } | ||||
|                         // Handle object format: {"query1": "reason1", "query2": "reason2"} or {"query1" : "query2"} | ||||
|                         else if (typeof parsed === 'object' && parsed !== null) { | ||||
|                             // Extract both keys and values as potential queries | ||||
|                             const keys = Object.keys(parsed); | ||||
|                             const values = Object.values(parsed); | ||||
|  | ||||
|                             // Add keys as queries | ||||
|                             searchQueries = keys | ||||
|                                 .filter(key => key && key.length > 3) | ||||
|                                 .map(key => key.trim()); | ||||
|  | ||||
|                             // Add values as queries if they're strings and not already included | ||||
|                             values | ||||
|                                 .filter((val): val is string => typeof val === 'string' && val.length > 3) | ||||
|                                 .map(val => val.trim()) | ||||
|                                 .forEach((val: string) => { | ||||
|                                     if (!searchQueries.includes(val)) { | ||||
|                                         searchQueries.push(val); | ||||
|                                     } | ||||
|                                 }); | ||||
|  | ||||
|                 log.info(`Extracted JSON: ${JSON.stringify(extractedJson).substring(0, 200)}...`); | ||||
|                             log.info(`Extracted ${searchQueries.length} queries from JSON object`); | ||||
|                         } | ||||
|                     } catch (parseError) { | ||||
|                         log.info(`JSON parse error: ${parseError}. Will use fallback parsing.`); | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
|                 if (Array.isArray(extractedJson) && extractedJson.length > 0) { | ||||
|                     // Convert the extracted data to SubQuery objects | ||||
|                     subQueries = extractedJson | ||||
|                         .filter(item => item && typeof item === 'object' && item.text) | ||||
|                         .map(item => ({ | ||||
|                 // Fallback: Try to extract an array manually by splitting on commas between quotes | ||||
|                 if (searchQueries.length === 0 && jsonStr.includes('[') && jsonStr.includes(']')) { | ||||
|                     const arrayContent = jsonStr.substring( | ||||
|                         jsonStr.indexOf('[') + 1, | ||||
|                         jsonStr.lastIndexOf(']') | ||||
|                     ); | ||||
|  | ||||
|                     // Use regex to match quoted strings, handling escaped quotes | ||||
|                     const stringMatches = arrayContent.match(/"((?:\\.|[^"\\])*)"/g); | ||||
|                     if (stringMatches && stringMatches.length > 0) { | ||||
|                         searchQueries = stringMatches | ||||
|                             .map((m: string) => m.substring(1, m.length - 1).trim()) // Remove surrounding quotes | ||||
|                             .filter((s: string) => s.length > 0); | ||||
|                         log.info(`Extracted ${searchQueries.length} queries using regex`); | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
|                 // Fallback: Try to extract key-value pairs from object notation manually | ||||
|                 if (searchQueries.length === 0 && jsonStr.includes('{') && jsonStr.includes('}')) { | ||||
|                     // Extract content between curly braces | ||||
|                     const objectContent = jsonStr.substring( | ||||
|                         jsonStr.indexOf('{') + 1, | ||||
|                         jsonStr.lastIndexOf('}') | ||||
|                     ); | ||||
|  | ||||
|                     // Split by commas that aren't inside quotes | ||||
|                     const pairs: string[] = objectContent.split(/,(?=(?:[^"]*"[^"]*")*[^"]*$)/); | ||||
|  | ||||
|                     for (const pair of pairs) { | ||||
|                         // Split by colon that isn't inside quotes | ||||
|                         const keyValue = pair.split(/:(?=(?:[^"]*"[^"]*")*[^"]*$)/); | ||||
|                         if (keyValue.length === 2) { | ||||
|                             const key = keyValue[0].replace(/"/g, '').trim(); | ||||
|                             const value = keyValue[1].replace(/"/g, '').trim(); | ||||
|  | ||||
|                             if (key && key.length > 3 && !searchQueries.includes(key)) { | ||||
|                                 searchQueries.push(key); | ||||
|                             } | ||||
|  | ||||
|                             if (value && value.length > 3 && !searchQueries.includes(value)) { | ||||
|                                 searchQueries.push(value); | ||||
|                             } | ||||
|                         } | ||||
|                     } | ||||
|  | ||||
|                     log.info(`Extracted ${searchQueries.length} queries from manual object parsing`); | ||||
|                 } | ||||
|  | ||||
|                 // Convert search queries to SubQuery objects | ||||
|                 if (searchQueries.length > 0) { | ||||
|                     const subQueries = searchQueries.map((text, index) => ({ | ||||
|                         id: this.generateSubQueryId(), | ||||
|                             text: item.text, | ||||
|                             reason: item.reason || "Sub-aspect of the main question", | ||||
|                         text, | ||||
|                         reason: `Search query ${index + 1}`, | ||||
|                         isAnswered: false | ||||
|                     })); | ||||
|  | ||||
|                     log.info(`Successfully created ${subQueries.length} sub-queries from LLM response`); | ||||
|                 } else { | ||||
|                     log.info(`Failed to extract array of sub-queries from LLM response`); | ||||
|                 } | ||||
|             } catch (error) { | ||||
|                 log.error(`Error parsing LLM response: ${error}`); | ||||
|             } | ||||
|  | ||||
|             // Always include the original query | ||||
|             const hasOriginal = subQueries.some(sq => sq.text.toLowerCase() === query.toLowerCase()); | ||||
|                     // Always include the original query if not already included | ||||
|                     const hasOriginal = subQueries.some(sq => sq.text.toLowerCase().includes(query.toLowerCase()) || query.toLowerCase().includes(sq.text.toLowerCase())); | ||||
|                     if (!hasOriginal) { | ||||
|                 subQueries.push({ | ||||
|                         subQueries.unshift({ | ||||
|                             id: this.generateSubQueryId(), | ||||
|                     text: query, | ||||
|                             text: query.trim(), | ||||
|                             reason: "Original query", | ||||
|                             isAnswered: false | ||||
|                         }); | ||||
|                         log.info(`Added original query to sub-queries list`); | ||||
|                     } | ||||
|  | ||||
|             // Ensure we have at least 3 queries for better search coverage | ||||
|             if (subQueries.length < 3) { | ||||
|                 // Create some generic variants of the original query | ||||
|                 const genericVariants = [ | ||||
|                     { text: `${query} examples and use cases`, reason: "Practical applications" }, | ||||
|                     { text: `${query} concepts and definitions`, reason: "Conceptual understanding" }, | ||||
|                     { text: `${query} best practices`, reason: "Implementation guidance" } | ||||
|                 ]; | ||||
|  | ||||
|                 // Add variants until we have at least 3 queries | ||||
|                 for (let i = 0; i < genericVariants.length && subQueries.length < 3; i++) { | ||||
|                     subQueries.push({ | ||||
|                         id: this.generateSubQueryId(), | ||||
|                         text: genericVariants[i].text, | ||||
|                         reason: genericVariants[i].reason, | ||||
|                         isAnswered: false | ||||
|                     }); | ||||
|                 } | ||||
|  | ||||
|                 log.info(`Added ${3 - subQueries.length} generic variants to ensure minimum 3 queries`); | ||||
|             } | ||||
|  | ||||
|                     log.info(`Final sub-queries for vector search: ${subQueries.map(sq => `"${sq.text}"`).join(', ')}`); | ||||
|                     return subQueries; | ||||
|         } catch (error) { | ||||
|             log.error(`Error in simpleQueryDecomposition: ${error}`); | ||||
|                 } | ||||
|             } catch (parseError) { | ||||
|                 log.error(`Error parsing search queries: ${parseError}`); | ||||
|             } | ||||
|  | ||||
|             // Return the original query plus some variants as fallback | ||||
|             const fallbackQueries = [ | ||||
|             // Fallback if all extraction methods fail | ||||
|             log.info(`Using fallback queries`); | ||||
|             return [ | ||||
|                 { | ||||
|                     id: this.generateSubQueryId(), | ||||
|                     text: query, | ||||
| @@ -395,20 +483,27 @@ Query: ${query}`; | ||||
|                 }, | ||||
|                 { | ||||
|                     id: this.generateSubQueryId(), | ||||
|                     text: `${query} overview`, | ||||
|                     text: `${query.trim()} overview`, | ||||
|                     reason: "General information", | ||||
|                     isAnswered: false | ||||
|                 }, | ||||
|                 { | ||||
|                     id: this.generateSubQueryId(), | ||||
|                     text: `${query} examples`, | ||||
|                     text: `${query.trim()} examples`, | ||||
|                     reason: "Practical examples", | ||||
|                     isAnswered: false | ||||
|                 } | ||||
|             ]; | ||||
|         } catch (error) { | ||||
|             log.error(`Error in simpleQueryDecomposition: ${error}`); | ||||
|  | ||||
|             log.info(`Using fallback queries due to error: ${fallbackQueries.map(sq => `"${sq.text}"`).join(', ')}`); | ||||
|             return fallbackQueries; | ||||
|             // Return the original query as fallback | ||||
|             return [{ | ||||
|                 id: this.generateSubQueryId(), | ||||
|                 text: query, | ||||
|                 reason: "Error occurred, using original query", | ||||
|                 isAnswered: false | ||||
|             }]; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user