fix: #12857 byebye cheerio

This commit is contained in:
Julian Lam
2024-10-18 14:41:22 -04:00
parent cac4f8e7d6
commit 8492b5f402
3 changed files with 19 additions and 6 deletions

View File

@@ -51,7 +51,6 @@
"bootswatch": "5.3.3",
"chalk": "4.1.2",
"chart.js": "4.4.5",
"cheerio": "^1.0.0-rc.12",
"cli-graph": "3.2.2",
"clipboard": "2.0.11",
"colors": "1.4.0",

View File

@@ -5,7 +5,7 @@ const process = require('process');
const nconf = require('nconf');
const winston = require('winston');
const validator = require('validator');
const cheerio = require('cheerio');
// const cheerio = require('cheerio');
const crypto = require('crypto');
const meta = require('../meta');
@@ -292,13 +292,17 @@ Helpers.resolveObjects = async (ids) => {
return objects.length === 1 ? objects[0] : objects;
};
const titleishTags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'title', 'p', 'span'];
const titleRegex = new RegExp(`<(?:${titleishTags.join('|')})>(.+?)</(?:${titleishTags.join('|')})>`);
Helpers.generateTitle = (html) => {
// Given an html string, generates a more appropriate title if possible
const $ = cheerio.load(html);
let title;
// Try the first paragraph element
title = $('h1, h2, h3, h4, h5, h6, title, p, span').first().text();
// Try the first paragraph-like element
const match = html.match(titleRegex);
if (match) {
title = match[1];
}
// Fall back to newline splitting (i.e. if no paragraph elements)
title = title || html.split('\n').filter(Boolean).shift();

View File

@@ -108,13 +108,23 @@ describe('ActivityPub integration', () => {
});
});
describe('.generateTitle', () => {
describe.only('.generateTitle', () => {
it('should take the first paragraph element\'s text', () => {
const source = '<p>Lorem ipsum dolor sit amet</p><p>consectetur adipiscing elit. Integer tincidunt metus scelerisque, dignissim risus a, fermentum leo. Pellentesque eleifend ullamcorper risus tempus vestibulum. Proin mollis ipsum et magna lobortis, at pretium enim pharetra. Ut vel ex metus. Mauris faucibus lectus et nulla iaculis, et pellentesque elit pellentesque. Aliquam rhoncus nec nulla eu lacinia. Maecenas cursus iaculis ligula, eu pharetra ex suscipit sit amet.</p>';
const title = activitypub.helpers.generateTitle(source);
assert.strictEqual(title, 'Lorem ipsum dolor sit amet');
});
it('should also accept a couple other tags like h1 or span', () => {
let source = '<h1>Lorem ipsum dolor sit amet</h1><p>consectetur adipiscing elit. Integer tincidunt metus scelerisque, dignissim risus a, fermentum leo. Pellentesque eleifend ullamcorper risus tempus vestibulum. Proin mollis ipsum et magna lobortis, at pretium enim pharetra. Ut vel ex metus. Mauris faucibus lectus et nulla iaculis, et pellentesque elit pellentesque. Aliquam rhoncus nec nulla eu lacinia. Maecenas cursus iaculis ligula, eu pharetra ex suscipit sit amet.</p>';
let title = activitypub.helpers.generateTitle(source);
assert.strictEqual(title, 'Lorem ipsum dolor sit amet');
source = '<span>Lorem ipsum dolor sit amet</span><p>consectetur adipiscing elit. Integer tincidunt metus scelerisque, dignissim risus a, fermentum leo. Pellentesque eleifend ullamcorper risus tempus vestibulum. Proin mollis ipsum et magna lobortis, at pretium enim pharetra. Ut vel ex metus. Mauris faucibus lectus et nulla iaculis, et pellentesque elit pellentesque. Aliquam rhoncus nec nulla eu lacinia. Maecenas cursus iaculis ligula, eu pharetra ex suscipit sit amet.</p>';
title = activitypub.helpers.generateTitle(source);
assert.strictEqual(title, 'Lorem ipsum dolor sit amet');
});
it('should take the first line\'s text if no matched elements', () => {
const source = 'Lorem ipsum dolor sit amet\n\nconsectetur adipiscing elit. Integer tincidunt metus scelerisque, dignissim risus a, fermentum leo. Pellentesque eleifend ullamcorper risus tempus vestibulum. Proin mollis ipsum et magna lobortis, at pretium enim pharetra. Ut vel ex metus. Mauris faucibus lectus et nulla iaculis, et pellentesque elit pellentesque. Aliquam rhoncus nec nulla eu lacinia. Maecenas cursus iaculis ligula, eu pharetra ex suscipit sit amet.';
const title = activitypub.helpers.generateTitle(source);