fix: use newline_boundaries param for tokenizer during title and summary generation, attempt to serve HTML in summary generation

This commit is contained in:
Julian Lam
2025-09-04 16:55:04 -04:00
parent fcd9f1a999
commit 2ea624fc8e
2 changed files with 12 additions and 4 deletions

View File

@@ -757,17 +757,25 @@ Mocks.notes.public = async (post) => {
attachment: normalizeAttachment(noteAttachment),
};
const sentences = tokenizer.sentences(post.content, { sanitize: true });
const sentences = tokenizer.sentences(post.content, { newline_boundaries: true });
// Append sentences to summary until it contains just under 500 characters of content
const limit = 500;
let remaining = limit;
summary = sentences.reduce((memo, sentence) => {
const remaining = limit - memo.length;
if (sentence.length < remaining) {
const clean = sanitize(sentence, {
allowedTags: [],
allowedAttributes: {},
});
remaining = remaining - clean.length;
if (remaining > 0) {
memo += ` ${sentence}`;
}
return memo;
}, '');
// Final sanitization to clean up tags
summary = posts.sanitize(summary);
}
let context = await posts.getPostField(post.pid, 'context');

View File

@@ -156,7 +156,7 @@ Notes.assert = async (uid, input, options = { skipChecks: false }) => {
// mainPid ok to leave as-is
if (!title) {
const sentences = tokenizer.sentences(content || sourceContent, { sanitize: true });
const sentences = tokenizer.sentences(content || sourceContent, { sanitize: true, newline_boundaries: true });
title = sentences.shift();
}