fix: use newline_boundaries param for tokenizer during title and summary generation, attempt to serve HTML in summary generation

This commit is contained in:
Julian Lam
2025-09-04 16:55:04 -04:00
parent fcd9f1a999
commit 2ea624fc8e
2 changed files with 12 additions and 4 deletions

View File

@@ -757,17 +757,25 @@ Mocks.notes.public = async (post) => {
attachment: normalizeAttachment(noteAttachment), attachment: normalizeAttachment(noteAttachment),
}; };
const sentences = tokenizer.sentences(post.content, { sanitize: true }); const sentences = tokenizer.sentences(post.content, { newline_boundaries: true });
// Append sentences to summary until it contains just under 500 characters of content // Append sentences to summary until it contains just under 500 characters of content
const limit = 500; const limit = 500;
let remaining = limit;
summary = sentences.reduce((memo, sentence) => { summary = sentences.reduce((memo, sentence) => {
const remaining = limit - memo.length; const clean = sanitize(sentence, {
if (sentence.length < remaining) { allowedTags: [],
allowedAttributes: {},
});
remaining = remaining - clean.length;
if (remaining > 0) {
memo += ` ${sentence}`; memo += ` ${sentence}`;
} }
return memo; return memo;
}, ''); }, '');
// Final sanitization to clean up tags
summary = posts.sanitize(summary);
} }
let context = await posts.getPostField(post.pid, 'context'); let context = await posts.getPostField(post.pid, 'context');

View File

@@ -156,7 +156,7 @@ Notes.assert = async (uid, input, options = { skipChecks: false }) => {
// mainPid ok to leave as-is // mainPid ok to leave as-is
if (!title) { if (!title) {
const sentences = tokenizer.sentences(content || sourceContent, { sanitize: true }); const sentences = tokenizer.sentences(content || sourceContent, { sanitize: true, newline_boundaries: true });
title = sentences.shift(); title = sentences.shift();
} }