diff options
author | kaniini <nenolod@gmail.com> | 2019-06-05 15:51:32 +0000 |
---|---|---|
committer | kaniini <nenolod@gmail.com> | 2019-06-05 15:51:32 +0000 |
commit | a511d2f9003870d267c143ee1aa12a0dfd805f8e (patch) | |
tree | 581f3e97a6fd06e3b4fa1f1d20adace9fb8d08db /lib | |
parent | 50afe5ab9e384676a7e64b476f3655ccf94acb5d (diff) | |
parent | 1cb245c9825febb0b83cfc361f78d132cb5d05a8 (diff) | |
download | pleroma-a511d2f9003870d267c143ee1aa12a0dfd805f8e.tar.gz |
Merge branch 'improve-search' into 'develop'
[#943] Contain search for unauthenticated users
See merge request pleroma/pleroma!1220
Diffstat (limited to 'lib')
-rw-r--r-- | lib/mix/tasks/benchmark.ex | 2 | ||||
-rw-r--r-- | lib/pleroma/activity.ex | 2 | ||||
-rw-r--r-- | lib/pleroma/activity/search.ex | 75 | ||||
-rw-r--r-- | lib/pleroma/user.ex | 118 | ||||
-rw-r--r-- | lib/pleroma/user/search.ex | 145 | ||||
-rw-r--r-- | lib/pleroma/web/mastodon_api/mastodon_api_controller.ex | 61 |
6 files changed, 227 insertions, 176 deletions
diff --git a/lib/mix/tasks/benchmark.ex b/lib/mix/tasks/benchmark.ex index 0fbb4dbb1..e4b1a638a 100644 --- a/lib/mix/tasks/benchmark.ex +++ b/lib/mix/tasks/benchmark.ex @@ -7,7 +7,7 @@ defmodule Mix.Tasks.Pleroma.Benchmark do Benchee.run(%{ "search" => fn -> - Pleroma.Web.MastodonAPI.MastodonAPIController.status_search(nil, "cofe") + Pleroma.Activity.search(nil, "cofe") end }) end diff --git a/lib/pleroma/activity.ex b/lib/pleroma/activity.ex index 99589590c..6db41fe6e 100644 --- a/lib/pleroma/activity.ex +++ b/lib/pleroma/activity.ex @@ -343,4 +343,6 @@ defmodule Pleroma.Activity do ) ) end + + defdelegate search(user, query), to: Pleroma.Activity.Search end diff --git a/lib/pleroma/activity/search.ex b/lib/pleroma/activity/search.ex new file mode 100644 index 000000000..f2fdfffe1 --- /dev/null +++ b/lib/pleroma/activity/search.ex @@ -0,0 +1,75 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2019 Pleroma Authors <https://pleroma.social/> +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Activity.Search do + alias Pleroma.Activity + alias Pleroma.Object.Fetcher + alias Pleroma.Repo + alias Pleroma.User + alias Pleroma.Web.ActivityPub.Visibility + + import Ecto.Query + + def search(user, search_query) do + index_type = if Pleroma.Config.get([:database, :rum_enabled]), do: :rum, else: :gin + + Activity + |> Activity.with_preloaded_object() + |> Activity.restrict_deactivated_users() + |> restrict_public() + |> query_with(index_type, search_query) + |> maybe_restrict_local(user) + |> Repo.all() + |> maybe_fetch(user, search_query) + end + + defp restrict_public(q) do + from([a, o] in q, + where: fragment("?->>'type' = 'Create'", a.data), + where: "https://www.w3.org/ns/activitystreams#Public" in a.recipients, + limit: 40 + ) + end + + defp query_with(q, :gin, search_query) do + from([a, o] in q, + where: + fragment( + "to_tsvector('english', ?->>'content') @@ plainto_tsquery('english', ?)", + o.data, + ^search_query + ), + order_by: [desc: :id] + ) + end + + defp query_with(q, :rum, search_query) do + from([a, o] in q, + where: + fragment( + "? @@ plainto_tsquery('english', ?)", + o.fts_content, + ^search_query + ), + order_by: [fragment("? <=> now()::date", o.inserted_at)] + ) + end + + # users can search everything + defp maybe_restrict_local(q, %User{}), do: q + + # unauthenticated users can only search local activities + defp maybe_restrict_local(q, _), do: where(q, local: true) + + defp maybe_fetch(activities, user, search_query) do + with true <- Regex.match?(~r/https?:/, search_query), + {:ok, object} <- Fetcher.fetch_object_from_id(search_query), + %Activity{} = activity <- Activity.get_create_by_object_ap_id(object.data["id"]), + true <- Visibility.visible_for_user?(activity, user) do + activities ++ [activity] + else + _ -> activities + end + end +end diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex index d873d7623..d9f7e14b0 100644 --- a/lib/pleroma/user.ex +++ b/lib/pleroma/user.ex @@ -735,122 +735,6 @@ defmodule Pleroma.User do |> Repo.all() end - def search(query, resolve \\ false, for_user \\ nil) do - # Strip the beginning @ off if there is a query - query = String.trim_leading(query, "@") - - if resolve, do: get_or_fetch(query) - - {:ok, results} = - Repo.transaction(fn -> - Ecto.Adapters.SQL.query(Repo, "select set_limit(0.25)", []) - Repo.all(search_query(query, for_user)) - end) - - results - end - - def search_query(query, for_user) do - fts_subquery = fts_search_subquery(query) - trigram_subquery = trigram_search_subquery(query) - union_query = from(s in trigram_subquery, union_all: ^fts_subquery) - distinct_query = from(s in subquery(union_query), order_by: s.search_type, distinct: s.id) - - from(s in subquery(boost_search_rank_query(distinct_query, for_user)), - order_by: [desc: s.search_rank], - limit: 40 - ) - end - - defp boost_search_rank_query(query, nil), do: query - - defp boost_search_rank_query(query, for_user) do - friends_ids = get_friends_ids(for_user) - followers_ids = get_followers_ids(for_user) - - from(u in subquery(query), - select_merge: %{ - search_rank: - fragment( - """ - CASE WHEN (?) THEN (?) * 1.3 - WHEN (?) THEN (?) * 1.2 - WHEN (?) THEN (?) * 1.1 - ELSE (?) END - """, - u.id in ^friends_ids and u.id in ^followers_ids, - u.search_rank, - u.id in ^friends_ids, - u.search_rank, - u.id in ^followers_ids, - u.search_rank, - u.search_rank - ) - } - ) - end - - defp fts_search_subquery(term, query \\ User) do - processed_query = - term - |> String.replace(~r/\W+/, " ") - |> String.trim() - |> String.split() - |> Enum.map(&(&1 <> ":*")) - |> Enum.join(" | ") - - from( - u in query, - select_merge: %{ - search_type: ^0, - search_rank: - fragment( - """ - ts_rank_cd( - setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') || - setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B'), - to_tsquery('simple', ?), - 32 - ) - """, - u.nickname, - u.name, - ^processed_query - ) - }, - where: - fragment( - """ - (setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') || - setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B')) @@ to_tsquery('simple', ?) - """, - u.nickname, - u.name, - ^processed_query - ) - ) - |> restrict_deactivated() - end - - defp trigram_search_subquery(term) do - from( - u in User, - select_merge: %{ - # ^1 gives 'Postgrex expected a binary, got 1' for some weird reason - search_type: fragment("?", 1), - search_rank: - fragment( - "similarity(?, trim(? || ' ' || coalesce(?, '')))", - ^term, - u.nickname, - u.name - ) - }, - where: fragment("trim(? || ' ' || coalesce(?, '')) % ?", u.nickname, u.name, ^term) - ) - |> restrict_deactivated() - end - def mute(muter, %User{ap_id: ap_id}) do info_cng = muter.info @@ -1449,4 +1333,6 @@ defmodule Pleroma.User do ) |> Repo.all() end + + defdelegate search(query, opts \\ []), to: User.Search end diff --git a/lib/pleroma/user/search.ex b/lib/pleroma/user/search.ex new file mode 100644 index 000000000..d5b2eaa9f --- /dev/null +++ b/lib/pleroma/user/search.ex @@ -0,0 +1,145 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2019 Pleroma Authors <https://pleroma.social/> +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.User.Search do + alias Pleroma.Repo + alias Pleroma.User + import Ecto.Query + + def search(query, opts \\ []) do + resolve = Keyword.get(opts, :resolve, false) + for_user = Keyword.get(opts, :for_user) + + # Strip the beginning @ off if there is a query + query = String.trim_leading(query, "@") + + if match?(%User{}, for_user) and resolve, do: User.get_or_fetch(query) + + {:ok, results} = + Repo.transaction(fn -> + Ecto.Adapters.SQL.query(Repo, "select set_limit(0.25)", []) + + query + |> search_query(for_user) + |> Repo.all() + end) + + results + end + + defp search_query(query, for_user) do + query + |> union_query() + |> distinct_query() + |> boost_search_rank_query(for_user) + |> subquery() + |> order_by(desc: :search_rank) + |> limit(20) + |> maybe_restrict_local(for_user) + end + + defp union_query(query) do + fts_subquery = fts_search_subquery(query) + trigram_subquery = trigram_search_subquery(query) + + from(s in trigram_subquery, union_all: ^fts_subquery) + end + + defp distinct_query(q) do + from(s in subquery(q), order_by: s.search_type, distinct: s.id) + end + + # unauthenticated users can only search local activities + defp maybe_restrict_local(q, %User{}), do: q + defp maybe_restrict_local(q, _), do: where(q, [u], u.local == true) + + defp boost_search_rank_query(query, nil), do: query + + defp boost_search_rank_query(query, for_user) do + friends_ids = User.get_friends_ids(for_user) + followers_ids = User.get_followers_ids(for_user) + + from(u in subquery(query), + select_merge: %{ + search_rank: + fragment( + """ + CASE WHEN (?) THEN (?) * 1.3 + WHEN (?) THEN (?) * 1.2 + WHEN (?) THEN (?) * 1.1 + ELSE (?) END + """, + u.id in ^friends_ids and u.id in ^followers_ids, + u.search_rank, + u.id in ^friends_ids, + u.search_rank, + u.id in ^followers_ids, + u.search_rank, + u.search_rank + ) + } + ) + end + + defp fts_search_subquery(term, query \\ User) do + processed_query = + term + |> String.replace(~r/\W+/, " ") + |> String.trim() + |> String.split() + |> Enum.map(&(&1 <> ":*")) + |> Enum.join(" | ") + + from( + u in query, + select_merge: %{ + search_type: ^0, + search_rank: + fragment( + """ + ts_rank_cd( + setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') || + setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B'), + to_tsquery('simple', ?), + 32 + ) + """, + u.nickname, + u.name, + ^processed_query + ) + }, + where: + fragment( + """ + (setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') || + setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B')) @@ to_tsquery('simple', ?) + """, + u.nickname, + u.name, + ^processed_query + ) + ) + |> User.restrict_deactivated() + end + + defp trigram_search_subquery(term) do + from( + u in User, + select_merge: %{ + # ^1 gives 'Postgrex expected a binary, got 1' for some weird reason + search_type: fragment("?", 1), + search_rank: + fragment( + "similarity(?, trim(? || ' ' || coalesce(?, '')))", + ^term, + u.nickname, + u.name + ) + }, + where: fragment("trim(? || ' ' || coalesce(?, '')) % ?", u.nickname, u.name, ^term) + ) + |> User.restrict_deactivated() + end +end diff --git a/lib/pleroma/web/mastodon_api/mastodon_api_controller.ex b/lib/pleroma/web/mastodon_api/mastodon_api_controller.ex index d825555c6..92cd77f62 100644 --- a/lib/pleroma/web/mastodon_api/mastodon_api_controller.ex +++ b/lib/pleroma/web/mastodon_api/mastodon_api_controller.ex @@ -14,7 +14,6 @@ defmodule Pleroma.Web.MastodonAPI.MastodonAPIController do alias Pleroma.HTTP alias Pleroma.Notification alias Pleroma.Object - alias Pleroma.Object.Fetcher alias Pleroma.Pagination alias Pleroma.Repo alias Pleroma.ScheduledActivity @@ -1125,64 +1124,9 @@ defmodule Pleroma.Web.MastodonAPI.MastodonAPIController do end end - def status_search_query_with_gin(q, query) do - from([a, o] in q, - where: - fragment( - "to_tsvector('english', ?->>'content') @@ plainto_tsquery('english', ?)", - o.data, - ^query - ), - order_by: [desc: :id] - ) - end - - def status_search_query_with_rum(q, query) do - from([a, o] in q, - where: - fragment( - "? @@ plainto_tsquery('english', ?)", - o.fts_content, - ^query - ), - order_by: [fragment("? <=> now()::date", o.inserted_at)] - ) - end - - def status_search(user, query) do - fetched = - if Regex.match?(~r/https?:/, query) do - with {:ok, object} <- Fetcher.fetch_object_from_id(query), - %Activity{} = activity <- Activity.get_create_by_object_ap_id(object.data["id"]), - true <- Visibility.visible_for_user?(activity, user) do - [activity] - else - _e -> [] - end - end || [] - - q = - from([a, o] in Activity.with_preloaded_object(Activity), - where: fragment("?->>'type' = 'Create'", a.data), - where: "https://www.w3.org/ns/activitystreams#Public" in a.recipients, - limit: 40 - ) - - q = - if Pleroma.Config.get([:database, :rum_enabled]) do - status_search_query_with_rum(q, query) - else - status_search_query_with_gin(q, query) - end - - Repo.all(q) ++ fetched - end - def search2(%{assigns: %{user: user}} = conn, %{"q" => query} = params) do accounts = User.search(query, resolve: params["resolve"] == "true", for_user: user) - - statuses = status_search(user, query) - + statuses = Activity.search(user, query) tags_path = Web.base_url() <> "/tag/" tags = @@ -1205,8 +1149,7 @@ defmodule Pleroma.Web.MastodonAPI.MastodonAPIController do def search(%{assigns: %{user: user}} = conn, %{"q" => query} = params) do accounts = User.search(query, resolve: params["resolve"] == "true", for_user: user) - - statuses = status_search(user, query) + statuses = Activity.search(user, query) tags = query |