aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlambda <pleromagit@rogerbraun.net>2019-01-20 10:24:05 +0000
committerlambda <pleromagit@rogerbraun.net>2019-01-20 10:24:05 +0000
commitf3045a179e40bd8f670be588a8e93b8d05e95d27 (patch)
treedc3a665639d97c2bc274626fffb745e61b8be229
parent42612b1c8d356843b9e8785d3a91072f38fb50cf (diff)
parent5834b08fe77250d1dad0f2f6cd148f2fd8f85c09 (diff)
downloadpleroma-f3045a179e40bd8f670be588a8e93b8d05e95d27.tar.gz
Merge branch 'i1t/pleroma-477_user_search_improvements' into 'develop'
I1t/pleroma 477 user search improvements See merge request pleroma/pleroma!685
-rw-r--r--lib/pleroma/user.ex143
-rw-r--r--lib/pleroma/web/mastodon_api/mastodon_api_controller.ex6
-rw-r--r--lib/pleroma/web/twitter_api/twitter_api_controller.ex2
-rw-r--r--priv/repo/migrations/20190115085500_create_user_fts_index.exs17
-rw-r--r--priv/repo/migrations/20190118074940_fix_user_trigram_index.exs22
-rw-r--r--test/user_test.exs71
-rw-r--r--test/web/twitter_api/twitter_api_controller_test.exs10
7 files changed, 230 insertions, 41 deletions
diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex
index 1db1c53cb..06084b117 100644
--- a/lib/pleroma/user.ex
+++ b/lib/pleroma/user.ex
@@ -35,7 +35,7 @@ defmodule Pleroma.User do
field(:avatar, :map)
field(:local, :boolean, default: true)
field(:follower_address, :string)
- field(:search_distance, :float, virtual: true)
+ field(:search_rank, :float, virtual: true)
field(:tags, {:array, :string}, default: [])
field(:last_refreshed_at, :naive_datetime)
has_many(:notifications, Notification)
@@ -510,6 +510,12 @@ defmodule Pleroma.User do
{:ok, Repo.all(q)}
end
+ def get_followers_ids(user, page \\ nil) do
+ q = get_followers_query(user, page)
+
+ Repo.all(from(u in q, select: u.id))
+ end
+
def get_friends_query(%User{id: id, following: following}, nil) do
from(
u in User,
@@ -534,6 +540,12 @@ defmodule Pleroma.User do
{:ok, Repo.all(q)}
end
+ def get_friends_ids(user, page \\ nil) do
+ q = get_friends_query(user, page)
+
+ Repo.all(from(u in q, select: u.id))
+ end
+
def get_follow_requests_query(%User{} = user) do
from(
a in Activity,
@@ -665,37 +677,120 @@ defmodule Pleroma.User do
Repo.all(query)
end
- def search(query, resolve \\ false) do
- # strip the beginning @ off if there is a query
+ def search(query, resolve \\ false, for_user \\ nil) do
+ # Strip the beginning @ off if there is a query
query = String.trim_leading(query, "@")
- if resolve do
- User.get_or_fetch_by_nickname(query)
- end
+ if resolve, do: User.get_or_fetch_by_nickname(query)
- inner =
- from(
- u in User,
- select_merge: %{
- search_distance:
- fragment(
- "? <-> (? || coalesce(?, ''))",
- ^query,
- u.nickname,
- u.name
- )
- },
- where: not is_nil(u.nickname)
- )
+ fts_results = do_search(fts_search_subquery(query), for_user)
+
+ {:ok, trigram_results} =
+ Repo.transaction(fn ->
+ Ecto.Adapters.SQL.query(Repo, "select set_limit(0.25)", [])
+ do_search(trigram_search_subquery(query), for_user)
+ end)
+
+ Enum.uniq_by(fts_results ++ trigram_results, & &1.id)
+ end
+ defp do_search(subquery, for_user, options \\ []) do
q =
from(
- s in subquery(inner),
- order_by: s.search_distance,
- limit: 20
+ s in subquery(subquery),
+ order_by: [desc: s.search_rank],
+ limit: ^(options[:limit] || 20)
)
- Repo.all(q)
+ results =
+ q
+ |> Repo.all()
+ |> Enum.filter(&(&1.search_rank > 0))
+
+ boost_search_results(results, for_user)
+ end
+
+ defp fts_search_subquery(query) do
+ processed_query =
+ query
+ |> String.replace(~r/\W+/, " ")
+ |> String.trim()
+ |> String.split()
+ |> Enum.map(&(&1 <> ":*"))
+ |> Enum.join(" | ")
+
+ from(
+ u in User,
+ select_merge: %{
+ search_rank:
+ fragment(
+ """
+ ts_rank_cd(
+ setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') ||
+ setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B'),
+ to_tsquery('simple', ?),
+ 32
+ )
+ """,
+ u.nickname,
+ u.name,
+ ^processed_query
+ )
+ },
+ where:
+ fragment(
+ """
+ (setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') ||
+ setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B')) @@ to_tsquery('simple', ?)
+ """,
+ u.nickname,
+ u.name,
+ ^processed_query
+ )
+ )
+ end
+
+ defp trigram_search_subquery(query) do
+ from(
+ u in User,
+ select_merge: %{
+ search_rank:
+ fragment(
+ "similarity(?, trim(? || ' ' || coalesce(?, '')))",
+ ^query,
+ u.nickname,
+ u.name
+ )
+ },
+ where: fragment("trim(? || ' ' || coalesce(?, '')) % ?", u.nickname, u.name, ^query)
+ )
+ end
+
+ defp boost_search_results(results, nil), do: results
+
+ defp boost_search_results(results, for_user) do
+ friends_ids = get_friends_ids(for_user)
+ followers_ids = get_followers_ids(for_user)
+
+ Enum.map(
+ results,
+ fn u ->
+ search_rank_coef =
+ cond do
+ u.id in friends_ids ->
+ 1.2
+
+ u.id in followers_ids ->
+ 1.1
+
+ true ->
+ 1
+ end
+
+ Map.put(u, :search_rank, u.search_rank * search_rank_coef)
+ end
+ )
+ |> Enum.sort_by(&(-&1.search_rank))
end
def blocks_import(%User{} = blocker, blocked_identifiers) when is_list(blocked_identifiers) do
diff --git a/lib/pleroma/web/mastodon_api/mastodon_api_controller.ex b/lib/pleroma/web/mastodon_api/mastodon_api_controller.ex
index daad89185..882d336be 100644
--- a/lib/pleroma/web/mastodon_api/mastodon_api_controller.ex
+++ b/lib/pleroma/web/mastodon_api/mastodon_api_controller.ex
@@ -771,7 +771,7 @@ defmodule Pleroma.Web.MastodonAPI.MastodonAPIController do
end
def search2(%{assigns: %{user: user}} = conn, %{"q" => query} = params) do
- accounts = User.search(query, params["resolve"] == "true")
+ accounts = User.search(query, params["resolve"] == "true", user)
statuses = status_search(user, query)
@@ -795,7 +795,7 @@ defmodule Pleroma.Web.MastodonAPI.MastodonAPIController do
end
def search(%{assigns: %{user: user}} = conn, %{"q" => query} = params) do
- accounts = User.search(query, params["resolve"] == "true")
+ accounts = User.search(query, params["resolve"] == "true", user)
statuses = status_search(user, query)
@@ -816,7 +816,7 @@ defmodule Pleroma.Web.MastodonAPI.MastodonAPIController do
end
def account_search(%{assigns: %{user: user}} = conn, %{"q" => query} = params) do
- accounts = User.search(query, params["resolve"] == "true")
+ accounts = User.search(query, params["resolve"] == "true", user)
res = AccountView.render("accounts.json", users: accounts, for: user, as: :user)
diff --git a/lib/pleroma/web/twitter_api/twitter_api_controller.ex b/lib/pleroma/web/twitter_api/twitter_api_controller.ex
index 1c728166c..ede079963 100644
--- a/lib/pleroma/web/twitter_api/twitter_api_controller.ex
+++ b/lib/pleroma/web/twitter_api/twitter_api_controller.ex
@@ -675,7 +675,7 @@ defmodule Pleroma.Web.TwitterAPI.Controller do
end
def search_user(%{assigns: %{user: user}} = conn, %{"query" => query}) do
- users = User.search(query, true)
+ users = User.search(query, true, user)
conn
|> put_view(UserView)
diff --git a/priv/repo/migrations/20190115085500_create_user_fts_index.exs b/priv/repo/migrations/20190115085500_create_user_fts_index.exs
new file mode 100644
index 000000000..499d67113
--- /dev/null
+++ b/priv/repo/migrations/20190115085500_create_user_fts_index.exs
@@ -0,0 +1,17 @@
+defmodule Pleroma.Repo.Migrations.CreateUserFtsIndex do
+ use Ecto.Migration
+
+ def change do
+ create index(
+ :users,
+ [
+ """
+ (setweight(to_tsvector('simple', regexp_replace(nickname, '\\W', ' ', 'g')), 'A') ||
+ setweight(to_tsvector('simple', regexp_replace(coalesce(name, ''), '\\W', ' ', 'g')), 'B'))
+ """
+ ],
+ name: :users_fts_index,
+ using: :gin
+ )
+ end
+end
diff --git a/priv/repo/migrations/20190118074940_fix_user_trigram_index.exs b/priv/repo/migrations/20190118074940_fix_user_trigram_index.exs
new file mode 100644
index 000000000..b4e8c984c
--- /dev/null
+++ b/priv/repo/migrations/20190118074940_fix_user_trigram_index.exs
@@ -0,0 +1,22 @@
+defmodule Pleroma.Repo.Migrations.FixUserTrigramIndex do
+ use Ecto.Migration
+
+ def up do
+ drop_if_exists(index(:users, [], name: :users_trigram_index))
+
+ create(
+ index(:users, ["(trim(nickname || ' ' || coalesce(name, ''))) gist_trgm_ops"],
+ name: :users_trigram_index,
+ using: :gist
+ )
+ )
+ end
+
+ def down do
+ drop_if_exists(index(:users, [], name: :users_trigram_index))
+
+ create(
+ index(:users, ["(nickname || name) gist_trgm_ops"], name: :users_trigram_index, using: :gist)
+ )
+ end
+end
diff --git a/test/user_test.exs b/test/user_test.exs
index 21a62483f..092cfc5dc 100644
--- a/test/user_test.exs
+++ b/test/user_test.exs
@@ -775,14 +775,61 @@ defmodule Pleroma.UserTest do
end
describe "User.search" do
- test "finds a user, ranking by similarity" do
- _user = insert(:user, %{name: "lain"})
- _user_two = insert(:user, %{name: "ean"})
- _user_three = insert(:user, %{name: "ebn", nickname: "lain@mastodon.social"})
- user_four = insert(:user, %{nickname: "lain@pleroma.soykaf.com"})
+ test "finds a user by full or partial nickname" do
+ user = insert(:user, %{nickname: "john"})
- assert user_four ==
- User.search("lain@ple") |> List.first() |> Map.put(:search_distance, nil)
+ Enum.each(["john", "jo", "j"], fn query ->
+ assert user == User.search(query) |> List.first() |> Map.put(:search_rank, nil)
+ end)
+ end
+
+ test "finds a user by full or partial name" do
+ user = insert(:user, %{name: "John Doe"})
+
+ Enum.each(["John Doe", "JOHN", "doe", "j d", "j", "d"], fn query ->
+ assert user == User.search(query) |> List.first() |> Map.put(:search_rank, nil)
+ end)
+ end
+
+ test "finds users, preferring nickname matches over name matches" do
+ u1 = insert(:user, %{name: "lain", nickname: "nick1"})
+ u2 = insert(:user, %{nickname: "lain", name: "nick1"})
+
+ assert [u2.id, u1.id] == Enum.map(User.search("lain"), & &1.id)
+ end
+
+ test "finds users, considering density of matched tokens" do
+ u1 = insert(:user, %{name: "Bar Bar plus Word Word"})
+ u2 = insert(:user, %{name: "Word Word Bar Bar Bar"})
+
+ assert [u2.id, u1.id] == Enum.map(User.search("bar word"), & &1.id)
+ end
+
+ test "finds users, ranking by similarity" do
+ u1 = insert(:user, %{name: "lain"})
+ _u2 = insert(:user, %{name: "ean"})
+ u3 = insert(:user, %{name: "ebn", nickname: "lain@mastodon.social"})
+ u4 = insert(:user, %{nickname: "lain@pleroma.soykaf.com"})
+
+ assert [u4.id, u3.id, u1.id] == Enum.map(User.search("lain@ple"), & &1.id)
+ end
+
+ test "finds users, handling misspelled requests" do
+ u1 = insert(:user, %{name: "lain"})
+
+ assert [u1.id] == Enum.map(User.search("laiin"), & &1.id)
+ end
+
+ test "finds users, boosting ranks of friends and followers" do
+ u1 = insert(:user)
+ u2 = insert(:user, %{name: "Doe"})
+ follower = insert(:user, %{name: "Doe"})
+ friend = insert(:user, %{name: "Doe"})
+
+ {:ok, follower} = User.follow(follower, u1)
+ {:ok, u1} = User.follow(u1, friend)
+
+ assert [friend.id, follower.id, u2.id] == Enum.map(User.search("doe", false, u1), & &1.id)
end
test "finds a user whose name is nil" do
@@ -792,7 +839,15 @@ defmodule Pleroma.UserTest do
assert user_two ==
User.search("lain@pleroma.soykaf.com")
|> List.first()
- |> Map.put(:search_distance, nil)
+ |> Map.put(:search_rank, nil)
+ end
+
+ test "does not yield false-positive matches" do
+ insert(:user, %{name: "John Doe"})
+
+ Enum.each(["mary", "a", ""], fn query ->
+ assert [] == User.search(query)
+ end)
end
end
diff --git a/test/web/twitter_api/twitter_api_controller_test.exs b/test/web/twitter_api/twitter_api_controller_test.exs
index e08edc525..f22cdd870 100644
--- a/test/web/twitter_api/twitter_api_controller_test.exs
+++ b/test/web/twitter_api/twitter_api_controller_test.exs
@@ -1655,16 +1655,16 @@ defmodule Pleroma.Web.TwitterAPI.ControllerTest do
describe "GET /api/pleroma/search_user" do
test "it returns users, ordered by similarity", %{conn: conn} do
user = insert(:user, %{name: "eal"})
- user_two = insert(:user, %{name: "ean"})
- user_three = insert(:user, %{name: "ebn"})
+ user_two = insert(:user, %{name: "eal me"})
+ _user_three = insert(:user, %{name: "zzz"})
resp =
conn
- |> get(twitter_api_search__path(conn, :search_user), query: "eal")
+ |> get(twitter_api_search__path(conn, :search_user), query: "eal me")
|> json_response(200)
- assert length(resp) == 3
- assert [user.id, user_two.id, user_three.id] == Enum.map(resp, fn %{"id" => id} -> id end)
+ assert length(resp) == 2
+ assert [user_two.id, user.id] == Enum.map(resp, fn %{"id" => id} -> id end)
end
end