fix: #12866, improve title generator, refactor tests

This commit is contained in:
Julian Lam
2024-10-25 13:18:45 -04:00
parent eccad588f4
commit beb85604bd
2 changed files with 40 additions and 32 deletions

View File

@@ -306,7 +306,7 @@ Helpers.resolveObjects = async (ids) => {
};
const titleishTags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'title', 'p', 'span'];
const titleRegex = new RegExp(`<(?:${titleishTags.join('|')})>(.+?)</(?:${titleishTags.join('|')})>`);
const titleRegex = new RegExp(`<(${titleishTags.join('|')})>(.+?)</\\1>`, 'm');
Helpers.generateTitle = (html) => {
// Given an html string, generates a more appropriate title if possible
let title;
@@ -314,12 +314,15 @@ Helpers.generateTitle = (html) => {
// Try the first paragraph-like element
const match = html.match(titleRegex);
if (match) {
title = match[1];
title = match[2];
}
// Fall back to newline splitting (i.e. if no paragraph elements)
title = title || html.split('\n').filter(Boolean).shift();
// Discard everything after a line break element
title = title.replace(/<br(\s\/)?>.*/g, '');
// Strip html
title = utils.stripHTMLTags(title);
@@ -338,6 +341,9 @@ Helpers.generateTitle = (html) => {
title = sentences.shift();
}
// Trim certain punctuation marks
title = title.trim().replace(/[:]$/, '');
// Truncate down if too long
if (title.length > meta.config.maximumTitleLength) {
title = `${title.slice(0, meta.config.maximumTitleLength - 3)}...`;