feat: logic for remote user deletion, cronjob, and ACP options for pruning options

re: #12611
This commit is contained in:
Julian Lam
2024-06-07 16:27:44 -04:00
parent 8d790964be
commit 4bb2c1a85e
6 changed files with 87 additions and 1 deletions

View File

@@ -191,5 +191,7 @@
"reconnectionDelay": 1500,
"disableCustomUserSkins": 0,
"activitypubEnabled": 1,
"activitypubAllowLoopback": 0
"activitypubAllowLoopback": 0,
"activitypubContentPruneDays": 30,
"activitypubUserPruneDays": 7
}

View File

@@ -2,6 +2,11 @@
"intro-lead": "What is Federation?",
"intro-body": "NodeBB is able to communicate with other NodeBB instances that support it. This is achieved through a protocol called <a href=\"https://activitypub.rocks/\">ActivityPub</a>. If enabled, NodeBB will also be able to communicate with other apps and websites that use ActivityPub (e.g. Mastodon, Peertube, etc.)",
"general": "General",
"pruning": "Content Pruning",
"content-pruning": "Days to keep remote content",
"content-pruning-help": "Note that remote content that has received engagement (a reply or a upvote/downvote) will be preserved. (0 for disabled)",
"user-pruning": "Days to cache remote user accounts",
"user-pruning-help": "Remote user accounts will only be pruned if they have no posts. Otherwise they will be re-retrieved. (0 for disabled)",
"enabled": "Enable Federation",
"enabled-help": "If enabled, will allow this NodeBB will be able to communicate with all Activitypub-enabled clients on the wider fediverse.",
"allowLoopback": "Allow loopback processing",

View File

@@ -1,8 +1,11 @@
'use strict';
const nconf = require('nconf');
const winston = require('winston');
const db = require('../database');
const meta = require('../meta');
const batch = require('../batch');
const user = require('../user');
const utils = require('../utils');
const TTLCache = require('../cache/ttl');
@@ -211,6 +214,11 @@ Actors.getLocalFollowersCount = async (id) => {
};
Actors.remove = async (id) => {
/**
* Remove ActivityPub related metadata pertaining to a remote id
*
* Note: don't call this directly! It is called as part of user.deleteAccount
*/
const exists = await db.isSortedSetMember('usersRemote:lastCrawled', id);
if (!exists) {
return false;
@@ -235,3 +243,50 @@ Actors.remove = async (id) => {
db.sortedSetRemove('usersRemote:lastCrawled', id),
]);
};
Actors.prune = async () => {
/**
* Clear out remote user accounts that do not have content on the forum anywhere
* Re-crawl those that have not been updated recently
*/
winston.verbose('[actors/prune] Started scheduled pruning of remote user accounts');
const days = parseInt(meta.config.activitypubUserPruneDays, 10);
const timestamp = Date.now() - (1000 * 60 * 60 * 24 * days);
const uids = await db.getSortedSetRangeByScore('usersRemote:lastCrawled', 0, -1, 0, timestamp);
if (!uids.length) {
winston.verbose('[actors/prune] No remote users to prune, all done.');
return;
}
winston.verbose(`[actors/prune] Found ${uids.length} remote users last crawled more than ${days} days ago`);
let deletionCount = 0;
const reassertionSet = new Set();
await batch.processArray(uids, async (uids) => {
const exists = await db.exists(uids.map(uid => `userRemote:${uid}`));
const counts = await db.sortedSetsCard(uids.map(uid => `uid:${uid}:posts`));
await Promise.all(uids.map(async (uid, idx) => {
if (!exists[idx]) {
// id in zset but not asserted, handle and return early
await db.sortedSetRemove('usersRemote:lastCrawled', uid);
return;
}
const count = counts[idx];
if (count < 1) {
await user.deleteAccount(uid);
deletionCount += 1;
} else {
reassertionSet.add(uid);
}
}));
}, {
batch: 50,
interval: 1000,
});
winston.verbose(`[actors/prune] ${deletionCount} remote users pruned, re-asserting ${reassertionSet.size} remote users.`);
await Actors.assert(Array.from(reassertionSet), { update: true });
};

View File

@@ -45,6 +45,7 @@ ActivityPub.actors = require('./actors');
ActivityPub.startJobs = () => {
// winston.verbose('[activitypub/jobs] Registering jobs.');
new CronJob('0 0 * * *', ActivityPub.notes.prune, null, true, null, null, false); // change last argument to true for debugging
new CronJob('0 1 * * *', ActivityPub.actors.prune, null, true, null, null, false); // change last argument to true for debugging
};
ActivityPub.resolveId = async (uid, id) => {

View File

@@ -95,6 +95,7 @@ module.exports = function (User) {
const userData = await db.getObject(utils.isNumber(uid) ? `user:${uid}` : `userRemote:${uid}`);
if (!userData || !userData.username) {
console.log('ERRORING', uid, userData);
delete deletesInProgress[uid];
throw new Error('[[error:no-user]]');
}

View File

@@ -24,6 +24,28 @@
</div>
</div>
<div class="row settings m-0">
<div class="col-sm-2 col-12 settings-header">[[admin/settings/activitypub:pruning]]</div>
<div class="col-sm-10 col-12">
<form>
<div class="mb-3">
<label class="form-label" for="activitypubContentPruneDays">[[admin/settings/activitypub:content-pruning]]</label>
<input type="number" id="activitypubContentPruneDays" name="activitypubContentPruneDays" data-field="activitypubContentPruneDays" title="[[admin/settings/activitypub:content-pruning]]" class="form-control" />
<div class="form-text">
[[admin/settings/activitypub:content-pruning-help]]
</div>
</div>
<div class="mb-3">
<label class="form-label" for="activitypubUserPruneDays">[[admin/settings/activitypub:user-pruning]]</label>
<input type="number" id="activitypubUserPruneDays" name="activitypubUserPruneDays" data-field="activitypubUserPruneDays" title="[[admin/settings/activitypub:user-pruning]]" class="form-control" />
<div class="form-text">
[[admin/settings/activitypub:user-pruning-help]]
</div>
</div>
</form>
</div>
</div>
<div class="row settings m-0">
<div class="col-sm-2 col-12 settings-header">[[admin/settings/activitypub:server-filtering]]</div>
<div class="col-sm-10 col-12">