mirror of
				https://github.com/zadam/trilium.git
				synced 2025-10-31 18:36:30 +01:00 
			
		
		
		
	custom implementation of similar notes algorithm
This commit is contained in:
		
							
								
								
									
										5
									
								
								package-lock.json
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										5
									
								
								package-lock.json
									
									
									
										generated
									
									
									
								
							| @@ -8158,11 +8158,6 @@ | |||||||
|       "resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-0.1.2.tgz", |       "resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-0.1.2.tgz", | ||||||
|       "integrity": "sha1-gIudDlb8Jz2Am6VzOOkpkZoanxo=" |       "integrity": "sha1-gIudDlb8Jz2Am6VzOOkpkZoanxo=" | ||||||
|     }, |     }, | ||||||
|     "string-similarity": { |  | ||||||
|       "version": "4.0.2", |  | ||||||
|       "resolved": "https://registry.npmjs.org/string-similarity/-/string-similarity-4.0.2.tgz", |  | ||||||
|       "integrity": "sha512-eCsPPyoQBgY4TMpVD6DVfO7pLrimUONriaO4Xjp3WPUW0YnNLqdHgRj23xotLlqrL90eJhBeq3zdAJf2mQgfBQ==" |  | ||||||
|     }, |  | ||||||
|     "string-width": { |     "string-width": { | ||||||
|       "version": "1.0.2", |       "version": "1.0.2", | ||||||
|       "resolved": "https://registry.npmjs.org/string-width/-/string-width-1.0.2.tgz", |       "resolved": "https://registry.npmjs.org/string-width/-/string-width-1.0.2.tgz", | ||||||
|   | |||||||
| @@ -36,6 +36,11 @@ const TPL = ` | |||||||
|         overflow: hidden;  |         overflow: hidden;  | ||||||
|         text-overflow: ellipsis; |         text-overflow: ellipsis; | ||||||
|     } |     } | ||||||
|  |      | ||||||
|  |     .note-path-list { | ||||||
|  |         max-height: 600px; | ||||||
|  |         overflow-y: auto; | ||||||
|  |     } | ||||||
|     </style> |     </style> | ||||||
|  |  | ||||||
|     <div class="current-path"></div> |     <div class="current-path"></div> | ||||||
|   | |||||||
| @@ -141,7 +141,7 @@ export default class SimilarNotesWidget extends TabAwareWidget { | |||||||
|             } |             } | ||||||
|  |  | ||||||
|             const $item = (await linkService.createNoteLink(similarNote.notePath.join("/"))) |             const $item = (await linkService.createNoteLink(similarNote.notePath.join("/"))) | ||||||
|                 .css("font-size", 24 * similarNote.score); |                 .css("font-size", 24 * (1 - 1 / (similarNote.score - 1))); | ||||||
|  |  | ||||||
|             $list.append($item); |             $list.append($item); | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -649,7 +649,7 @@ a.external:not(.no-arrow):after, a[href^="http://"]:not(.no-arrow):after, a[href | |||||||
| } | } | ||||||
|  |  | ||||||
| .component { | .component { | ||||||
|     contain: layout size; |     contain: size; | ||||||
| } | } | ||||||
|  |  | ||||||
| .toast { | .toast { | ||||||
|   | |||||||
| @@ -1,6 +1,6 @@ | |||||||
| "use strict"; | "use strict"; | ||||||
|  |  | ||||||
| const noteCacheService = require('../../services/note_cache/note_cache_service'); | const similarityService = require('../../services/note_cache/similarity.js'); | ||||||
| const repository = require('../../services/repository'); | const repository = require('../../services/repository'); | ||||||
|  |  | ||||||
| async function getSimilarNotes(req) { | async function getSimilarNotes(req) { | ||||||
| @@ -12,10 +12,7 @@ async function getSimilarNotes(req) { | |||||||
|         return [404, `Note ${noteId} not found.`]; |         return [404, `Note ${noteId} not found.`]; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     const results = await noteCacheService.findSimilarNotes(noteId); |     return await similarityService.findSimilarNotes(noteId); | ||||||
|  |  | ||||||
|     return results |  | ||||||
|         .filter(note => note.noteId !== noteId); |  | ||||||
| } | } | ||||||
|  |  | ||||||
| module.exports = { | module.exports = { | ||||||
|   | |||||||
| @@ -2,10 +2,47 @@ const noteCache = require('./note_cache'); | |||||||
| const noteCacheService = require('./note_cache_service.js'); | const noteCacheService = require('./note_cache_service.js'); | ||||||
| const dateUtils = require('../date_utils'); | const dateUtils = require('../date_utils'); | ||||||
|  |  | ||||||
| function computeScore(candidateNote, dates) { | function gatherRewards(rewardMap, text) { | ||||||
|     let score = 0; |     if (!text) { | ||||||
|  |         return 0; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     let counter = 0; | ||||||
|  |  | ||||||
|  |     for (const word of text.toLowerCase().split(/\W+/)) { | ||||||
|  |         counter += rewardMap[word] || 0; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     return counter; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | function computeScore(candidateNote, ancestorNoteIds, rewardMap, dates) { | ||||||
|  |     let score = | ||||||
|  |         gatherRewards(rewardMap, candidateNote.title) | ||||||
|  |         + gatherRewards(rewardMap, candidateNote.type); | ||||||
|  |         + gatherRewards(rewardMap, trimMime(candidateNote.mime)); | ||||||
|  |  | ||||||
|  |     for (const ancestorNote of candidateNote.ancestors) { | ||||||
|  |         if (!ancestorNoteIds.includes(ancestorNote.noteId)) { | ||||||
|  |             score += gatherRewards(rewardMap, ancestorNote.title); | ||||||
|  |  | ||||||
|  |             for (const branch of ancestorNote.parentBranches) { | ||||||
|  |                 score += gatherRewards(rewardMap, branch.prefix); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     for (const branch of candidateNote.parentBranches) { | ||||||
|  |         score += gatherRewards(rewardMap, branch.prefix); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     for (const attr of candidateNote.attributes) { | ||||||
|  |         if (!IGNORED_ATTR_NAMES.includes(attr.name)) { | ||||||
|  |             score += gatherRewards(rewardMap, attr.name); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         score += gatherRewards(rewardMap, attr.value); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
|      * We want to improve standing of notes which have been created in similar time to each other since |      * We want to improve standing of notes which have been created in similar time to each other since | ||||||
| @@ -19,16 +56,16 @@ function computeScore(candidateNote, dates) { | |||||||
|     if (utcDateCreated >= dates.minDate && utcDateCreated <= dates.maxDate |     if (utcDateCreated >= dates.minDate && utcDateCreated <= dates.maxDate | ||||||
|         && utcDateCreated < dates.minExcludedDate && utcDateCreated > dates.maxExcludedDate) { |         && utcDateCreated < dates.minExcludedDate && utcDateCreated > dates.maxExcludedDate) { | ||||||
|  |  | ||||||
|         score += 0.3; |         score += 3; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     return score; |     return score; | ||||||
| } | } | ||||||
|  |  | ||||||
| function evaluateSimilarity(sourceNote, candidateNote, rewardMap, dates, results) { | function evaluateSimilarity(sourceNote, candidateNote, ancestorNoteIds, rewardMap, dates, results) { | ||||||
|     let score = computeScore(candidateNote, rewardMap, dates); |     let score = computeScore(candidateNote, ancestorNoteIds, rewardMap, dates); | ||||||
|  |  | ||||||
|     if (score > 0.5) { |     if (score >= 4) { | ||||||
|         const notePath = noteCacheService.getSomePath(candidateNote); |         const notePath = noteCacheService.getSomePath(candidateNote); | ||||||
|  |  | ||||||
|         // this takes care of note hoisting |         // this takes care of note hoisting | ||||||
| @@ -37,7 +74,7 @@ function evaluateSimilarity(sourceNote, candidateNote, rewardMap, dates, results | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         if (noteCacheService.isNotePathArchived(notePath)) { |         if (noteCacheService.isNotePathArchived(notePath)) { | ||||||
|             score -= 0.2; // archived penalization |             score -= 1; // archived penalization | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         results.push({score, notePath, noteId: candidateNote.noteId}); |         results.push({score, notePath, noteId: candidateNote.noteId}); | ||||||
| @@ -68,36 +105,36 @@ function buildRewardMap(note) { | |||||||
|     const map = {}; |     const map = {}; | ||||||
|  |  | ||||||
|     for (const ancestorNote of note.ancestors) { |     for (const ancestorNote of note.ancestors) { | ||||||
|         updateMap(map, ancestorNote.title, 0.4); |         addToRewardMap(map, ancestorNote.title, 0.4); | ||||||
|  |  | ||||||
|         for (const branch of ancestorNote.parentBranches) { |         for (const branch of ancestorNote.parentBranches) { | ||||||
|             updateMap(map, branch.prefix, 0.4); |             addToRewardMap(map, branch.prefix, 0.4); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     updateMap(map, note.type, 0.2); |     addToRewardMap(map, note.type, 0.2); | ||||||
|     updateMap(map, processMime(note.mime), 0.3); |     addToRewardMap(map, trimMime(note.mime), 0.3); | ||||||
|  |  | ||||||
|     updateMap(map, note.title, 1); |     addToRewardMap(map, note.title, 1); | ||||||
|  |  | ||||||
|     for (const branch of note.parentBranches) { |     for (const branch of note.parentBranches) { | ||||||
|         updateMap(map, branch.prefix, 1); |         addToRewardMap(map, branch.prefix, 1); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     for (const attr of note.attributes) { |     for (const attr of note.attributes) { | ||||||
|         const reward = note.noteId === attr.noteId ? 0.8 : 0.5; |         const reward = note.noteId === attr.noteId ? 0.8 : 0.5; | ||||||
|  |  | ||||||
|         if (!IGNORED_ATTR_NAMES.includes(attr.name)) { |         if (!IGNORED_ATTR_NAMES.includes(attr.name)) { | ||||||
|             updateMap(map, attr.name, reward); |             addToRewardMap(map, attr.name, reward); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         updateMap(map, attr.value, reward); |         addToRewardMap(map, attr.value, reward); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     return map; |     return map; | ||||||
| } | } | ||||||
|  |  | ||||||
| function processMime(mime) { | function trimMime(mime) { | ||||||
|     if (!mime) { |     if (!mime) { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
| @@ -118,21 +155,19 @@ function processMime(mime) { | |||||||
|     return str; |     return str; | ||||||
| } | } | ||||||
|  |  | ||||||
| function updateMap(map, text, baseReward) { | function addToRewardMap(map, text, baseReward) { | ||||||
|     if (!text) { |     if (!text) { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     for (const word of text.split(/\W+/)) { |     for (const word of text.toLowerCase().split(/\W+/)) { | ||||||
|  |         if (word) { | ||||||
|             map[word] = map[word] || 0; |             map[word] = map[word] || 0; | ||||||
|  |  | ||||||
|             // reward grows with the length of matched string |             // reward grows with the length of matched string | ||||||
|             map[word] += baseReward * Math.sqrt(word.length); |             map[word] += baseReward * Math.sqrt(word.length); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| function tokenize(str) { |  | ||||||
|     return ; |  | ||||||
| } | } | ||||||
|  |  | ||||||
| async function findSimilarNotes(noteId) { | async function findSimilarNotes(noteId) { | ||||||
| @@ -155,24 +190,25 @@ async function findSimilarNotes(noteId) { | |||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     const rewardMap = buildRewardMap(baseNote); |     const rewardMap = buildRewardMap(baseNote); | ||||||
|  |     const ancestorNoteIds = baseNote.ancestors.map(note => note.noteId); | ||||||
|  |  | ||||||
|     for (const candidateNote of Object.values(noteCache.notes)) { |     for (const candidateNote of Object.values(noteCache.notes)) { | ||||||
|         if (candidateNote.noteId === baseNote.noteId) { |         if (candidateNote.noteId === baseNote.noteId) { | ||||||
|             continue; |             continue; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         evaluateSimilarity(baseNote, candidateNote, rewardMap, dates, results); |         evaluateSimilarity(baseNote, candidateNote, ancestorNoteIds, rewardMap, dates, results); | ||||||
|  |  | ||||||
|         i++; |         i++; | ||||||
|  |  | ||||||
|         if (i % 200 === 0) { |         if (i % 1000 === 0) { | ||||||
|             await setImmediatePromise(); |             await setImmediatePromise(); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     results.sort((a, b) => a.score > b.score ? -1 : 1); |     results.sort((a, b) => a.score > b.score ? -1 : 1); | ||||||
|  |  | ||||||
|     return results.length > 50 ? results.slice(0, 200) : results; |     return results.length > 200 ? results.slice(0, 200) : results; | ||||||
| } | } | ||||||
|  |  | ||||||
| module.exports = { | module.exports = { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user