introduce storage cleaner

Once this is getting hooked up, it'll periodically delete old
messages.

The StoragePolicy can be chosen by the user, currently there's
two versions, delete everything based on age is the obvious.

The other is for the data hoarders among us. It'll only delete
message types which can be considered low value... Types with
a time aspect like away / back... joins / parts etc.

It tries to do that in a sensible way, so that we don't block
all other db writers that are ongoing.
The "periodically" interval is by design not exposed to the user.
This commit is contained in:
Reto Brunner 2023-11-04 17:23:51 +01:00
parent 14d9ff247d
commit 74aff7ee5a
4 changed files with 176 additions and 0 deletions

View file

@ -11,6 +11,7 @@
"../server/log.ts",
"../server/config.ts",
"../server/client.ts",
"../server/storageCleaner.ts",
"../server/clientManager.ts",
"../server/identification.ts",
"../server/plugins/changelog.ts",

View file

@ -304,6 +304,26 @@ module.exports = {
// This value is set to `["sqlite", "text"]` by default.
messageStorage: ["sqlite", "text"],
// ### `storagePolicy`
// When the sqlite storage is in use, control the maximum storage duration.
// A background task will periodically clean up messages older than the limit.
// The available keys for the `storagePolicy` object are:
//
// - `enabled`: If this is false, the cleaning task is not running.
// - `maxAgeDays`: Maximum age of an entry in days.
// - `deletionPolicy`: Controls what types of messages are being deleted.
// Valid options are:
// - `statusOnly`: Only delete message types which are status related (e.g. away, back, join, parts, mode, ctcp...)
// but keep actual messages from nicks. This keeps the DB size down while retaining "precious" messages.
// - `everything`: Delete everything, including messages from irc nicks
storagePolicy: {
enabled: false,
maxAgeDays: 7,
deletionPolicy: "statusOnly",
},
// ### `useHexIp`
//
// When set to `true`, users' IP addresses will be encoded as hex.

View file

@ -76,6 +76,12 @@ type Debug = {
raw: boolean;
};
type StoragePolicy = {
enabled: boolean;
maxAgeDays: number;
deletionPolicy: "statusOnly" | "everything";
};
export type ConfigType = {
public: boolean;
host: string | undefined;
@ -97,6 +103,7 @@ export type ConfigType = {
defaults: Defaults;
lockNetwork: boolean;
messageStorage: string[];
storagePolicy: StoragePolicy;
useHexIp: boolean;
webirc?: WebIRC;
identd: Identd;

148
server/storageCleaner.ts Normal file
View file

@ -0,0 +1,148 @@
import SqliteMessageStorage from "./plugins/messageStorage/sqlite";
import {MessageType} from "./models/msg";
import Config from "./config";
import {DeletionRequest} from "./plugins/messageStorage/types";
import log from "./log";
const status_types = [
MessageType.AWAY,
MessageType.BACK,
MessageType.INVITE,
MessageType.JOIN,
MessageType.KICK,
MessageType.MODE,
MessageType.MODE_CHANNEL,
MessageType.MODE_USER,
MessageType.NICK,
MessageType.PART,
MessageType.QUIT,
MessageType.CTCP, // not technically a status, but generally those are only of interest temporarily
MessageType.CTCP_REQUEST,
MessageType.CHGHOST,
MessageType.TOPIC,
MessageType.TOPIC_SET_BY,
];
export class StorageCleaner {
db: SqliteMessageStorage;
olderThanDays: number;
messageTypes: MessageType[] | null;
limit: number;
ticker?: ReturnType<typeof setTimeout>;
errCount: number;
isStopped: boolean;
constructor(db: SqliteMessageStorage) {
this.errCount = 0;
this.isStopped = true;
this.db = db;
this.limit = 200;
const policy = Config.values.storagePolicy;
this.olderThanDays = policy.maxAgeDays;
switch (policy.deletionPolicy) {
case "statusOnly":
this.messageTypes = status_types;
break;
case "everything":
this.messageTypes = null;
break;
default:
// exhaustive switch guard, blows up when user specifies a invalid policy enum
this.messageTypes = assertNoBadPolicy(policy.deletionPolicy);
}
}
private genDeletionRequest(): DeletionRequest {
return {
limit: this.limit,
messageTypes: this.messageTypes,
olderThanDays: this.olderThanDays,
};
}
async runDeletesNoLimit(): Promise<number> {
if (!Config.values.storagePolicy.enabled) {
// this is meant to be used by cli tools, so we guard against this
throw new Error("storage policy is disabled");
}
const req = this.genDeletionRequest();
req.limit = -1; // unlimited
const num_deleted = await this.db.deleteMessages(req);
return num_deleted;
}
private async runDeletes() {
if (this.isStopped) {
return;
}
if (!this.db.isEnabled) {
// TODO: remove this once the server is intelligent enough to wait for init
this.schedule(30 * 1000);
return;
}
const req = this.genDeletionRequest();
let num_deleted = 0;
try {
num_deleted = await this.db.deleteMessages(req);
this.errCount = 0; // reset when it works
} catch (err: any) {
this.errCount++;
log.error("can't clean messages", err.message);
if (this.errCount === 2) {
log.error("Cleaning failed too many times, will not retry");
this.stop();
return;
}
}
// need to recheck here as the field may have changed since the await
if (this.isStopped) {
return;
}
if (num_deleted < req.limit) {
this.schedule(5 * 60 * 1000);
} else {
this.schedule(5000); // give others a chance to execute queries
}
}
private schedule(ms: number) {
const self = this;
this.ticker = setTimeout(() => {
self.runDeletes().catch((err) => {
log.error("storageCleaner: unexpected failure");
throw err;
});
}, ms);
}
start() {
this.isStopped = false;
this.schedule(0);
}
stop() {
this.isStopped = true;
if (!this.ticker) {
return;
}
clearTimeout(this.ticker);
}
}
function assertNoBadPolicy(_: never): never {
throw new Error(
`Invalid deletion policy "${Config.values.storagePolicy.deletionPolicy}" in the \`storagePolicy\` object, fix your config.`
);
}