mirror of
https://github.com/NodeBB/NodeBB.git
synced 2025-10-26 08:36:12 +01:00
fix: add pre-processing step to title generation logic so sbd doesn't fall over so badly
This commit is contained in:
@@ -339,52 +339,6 @@ Helpers.resolveObjects = async (ids) => {
|
||||
return objects.length === 1 ? objects[0] : objects;
|
||||
};
|
||||
|
||||
const titleishTags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'title', 'p', 'span'];
|
||||
const titleRegex = new RegExp(`<(${titleishTags.join('|')})>(.+?)</\\1>`, 'm');
|
||||
Helpers.generateTitle = (html) => {
|
||||
// Given an html string, generates a more appropriate title if possible
|
||||
let title;
|
||||
|
||||
// Try the first paragraph-like element
|
||||
const match = html.match(titleRegex);
|
||||
if (match && match.index === 0) {
|
||||
title = match[2];
|
||||
}
|
||||
|
||||
// Fall back to newline splitting (i.e. if no paragraph elements)
|
||||
title = title || html.split('\n').filter(Boolean).shift();
|
||||
|
||||
// Discard everything after a line break element
|
||||
title = title.replace(/<br(\s\/)?>.*/g, '');
|
||||
|
||||
// Strip html
|
||||
title = utils.stripHTMLTags(title);
|
||||
|
||||
// Split sentences and use only first one
|
||||
const sentences = title
|
||||
.split(/(\.|\?|!)\s/)
|
||||
.reduce((memo, cur, idx, sentences) => {
|
||||
if (idx % 2) {
|
||||
memo.push(`${sentences[idx - 1]}${cur}`);
|
||||
} else if (idx === sentences.length - 1) {
|
||||
memo.push(cur);
|
||||
}
|
||||
|
||||
return memo;
|
||||
}, []);
|
||||
|
||||
if (sentences.length > 1) {
|
||||
title = sentences.shift();
|
||||
}
|
||||
|
||||
// Truncate down if too long
|
||||
if (title.length > meta.config.maximumTitleLength) {
|
||||
title = `${title.slice(0, meta.config.maximumTitleLength - 3)}...`;
|
||||
}
|
||||
|
||||
return title;
|
||||
};
|
||||
|
||||
Helpers.remoteAnchorToLocalProfile = async (content, isMarkdown = false) => {
|
||||
let anchorRegex;
|
||||
if (isMarkdown) {
|
||||
|
||||
@@ -165,7 +165,11 @@ Notes.assert = async (uid, input, options = { skipChecks: false }) => {
|
||||
|
||||
// mainPid ok to leave as-is
|
||||
if (!title) {
|
||||
const sentences = tokenizer.sentences(content || sourceContent, { sanitize: true });
|
||||
// Naive pre-processing prior to sbd tokenization
|
||||
let sbdInput = content || sourceContent;
|
||||
sbdInput = sbdInput.replace('</p><p>', '</p>\n<p>');
|
||||
|
||||
const sentences = tokenizer.sentences(sbdInput, { sanitize: true, newline_boundaries: true });
|
||||
title = sentences.shift();
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user