aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--config/config.exs2
-rw-r--r--config/description.exs1
-rw-r--r--lib/pleroma/migrators/hashtags_table_migrator.ex1
-rw-r--r--lib/pleroma/object.ex1
-rw-r--r--lib/pleroma/workers/cron/hashtags_cleanup_worker.ex57
5 files changed, 62 insertions, 0 deletions
diff --git a/config/config.exs b/config/config.exs
index c4a690799..dfd2fc434 100644
--- a/config/config.exs
+++ b/config/config.exs
@@ -553,10 +553,12 @@ config :pleroma, Oban,
remote_fetcher: 2,
attachments_cleanup: 1,
new_users_digest: 1,
+ hashtags_cleanup: 1,
mute_expire: 5
],
plugins: [Oban.Plugins.Pruner],
crontab: [
+ {"0 1 * * *", Pleroma.Workers.Cron.HashtagsCleanupWorker},
{"0 0 * * 0", Pleroma.Workers.Cron.DigestEmailsWorker},
{"0 0 * * *", Pleroma.Workers.Cron.NewUsersDigestWorker}
]
diff --git a/config/description.exs b/config/description.exs
index 46f085c70..147c1930c 100644
--- a/config/description.exs
+++ b/config/description.exs
@@ -1943,6 +1943,7 @@ config :pleroma, :config_description, [
type: {:list, :tuple},
description: "Settings for cron background jobs",
suggestions: [
+ {"0 1 * * *", Pleroma.Workers.Cron.HashtagsCleanupWorker},
{"0 0 * * 0", Pleroma.Workers.Cron.DigestEmailsWorker},
{"0 0 * * *", Pleroma.Workers.Cron.NewUsersDigestWorker}
]
diff --git a/lib/pleroma/migrators/hashtags_table_migrator.ex b/lib/pleroma/migrators/hashtags_table_migrator.ex
index 6a1c9592c..07b42a7f4 100644
--- a/lib/pleroma/migrators/hashtags_table_migrator.ex
+++ b/lib/pleroma/migrators/hashtags_table_migrator.ex
@@ -152,6 +152,7 @@ defmodule Pleroma.Migrators.HashtagsTableMigrator do
defp query do
# Note: most objects have Mention-type AS2 tags and no hashtags (but we can't filter them out)
+ # Note: not checking activity type; HashtagsCleanupWorker should clean up unused records later
from(
object in Object,
where:
diff --git a/lib/pleroma/object.ex b/lib/pleroma/object.ex
index 9edf43e04..52b77e41c 100644
--- a/lib/pleroma/object.ex
+++ b/lib/pleroma/object.ex
@@ -65,6 +65,7 @@ defmodule Pleroma.Object do
|> maybe_handle_hashtags_change(struct)
end
+ # Note: not checking activity type; HashtagsCleanupWorker should clean up unused records later
defp maybe_handle_hashtags_change(changeset, struct) do
with data_hashtags_change = get_change(changeset, :data),
true <- hashtags_changed?(struct, data_hashtags_change),
diff --git a/lib/pleroma/workers/cron/hashtags_cleanup_worker.ex b/lib/pleroma/workers/cron/hashtags_cleanup_worker.ex
new file mode 100644
index 000000000..b319067ca
--- /dev/null
+++ b/lib/pleroma/workers/cron/hashtags_cleanup_worker.ex
@@ -0,0 +1,57 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Workers.Cron.HashtagsCleanupWorker do
+ @moduledoc """
+ The worker to clean up unused hashtags_objects and hashtags.
+ """
+
+ use Oban.Worker, queue: "hashtags_cleanup"
+
+ alias Pleroma.Repo
+
+ require Logger
+
+ @hashtags_objects_query """
+ DELETE FROM hashtags_objects WHERE object_id IN
+ (SELECT DISTINCT objects.id FROM objects
+ JOIN hashtags_objects ON hashtags_objects.object_id = objects.id LEFT JOIN activities
+ ON COALESCE(activities.data->'object'->>'id', activities.data->>'object') =
+ (objects.data->>'id')
+ AND activities.data->>'type' = 'Create'
+ WHERE activities.id IS NULL);
+ """
+
+ @hashtags_query """
+ DELETE FROM hashtags WHERE id IN
+ (SELECT hashtags.id FROM hashtags
+ LEFT OUTER JOIN hashtags_objects
+ ON hashtags_objects.hashtag_id = hashtags.id
+ WHERE hashtags_objects.hashtag_id IS NULL AND hashtags.inserted_at < $1);
+ """
+
+ @impl Oban.Worker
+ def perform(_job) do
+ Logger.info("Cleaning up unused `hashtags_objects` records...")
+
+ {:ok, %{num_rows: hashtags_objects_count}} =
+ Repo.query(@hashtags_objects_query, [], timeout: :infinity)
+
+ Logger.info("Deleted #{hashtags_objects_count} unused `hashtags_objects` records.")
+
+ Logger.info("Cleaning up unused `hashtags` records...")
+
+ # Note: ignoring recently created hashtags since references are added after hashtag is created
+ {:ok, %{num_rows: hashtags_count}} =
+ Repo.query(@hashtags_query, [NaiveDateTime.add(NaiveDateTime.utc_now(), -3600 * 24)],
+ timeout: :infinity
+ )
+
+ Logger.info("Deleted #{hashtags_count} unused `hashtags` records.")
+
+ Logger.info("HashtagsCleanupWorker complete.")
+
+ :ok
+ end
+end