diff options
Diffstat (limited to 'lib/pleroma/gun')
-rw-r--r-- | lib/pleroma/gun/api.ex | 3 | ||||
-rw-r--r-- | lib/pleroma/gun/conn.ex | 85 | ||||
-rw-r--r-- | lib/pleroma/gun/connection_pool.ex | 79 | ||||
-rw-r--r-- | lib/pleroma/gun/connection_pool/reclaimer.ex | 85 | ||||
-rw-r--r-- | lib/pleroma/gun/connection_pool/worker.ex | 127 | ||||
-rw-r--r-- | lib/pleroma/gun/connection_pool/worker_supervisor.ex | 45 |
6 files changed, 349 insertions, 75 deletions
diff --git a/lib/pleroma/gun/api.ex b/lib/pleroma/gun/api.ex index f51cd7db8..09be74392 100644 --- a/lib/pleroma/gun/api.ex +++ b/lib/pleroma/gun/api.ex @@ -19,7 +19,8 @@ defmodule Pleroma.Gun.API do :tls_opts, :tcp_opts, :socks_opts, - :ws_opts + :ws_opts, + :supervise ] @impl Gun diff --git a/lib/pleroma/gun/conn.ex b/lib/pleroma/gun/conn.ex index cd25a2e74..a3f75a4bb 100644 --- a/lib/pleroma/gun/conn.ex +++ b/lib/pleroma/gun/conn.ex @@ -3,85 +3,33 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Gun.Conn do - @moduledoc """ - Struct for gun connection data - """ alias Pleroma.Gun - alias Pleroma.Pool.Connections require Logger - @type gun_state :: :up | :down - @type conn_state :: :active | :idle - - @type t :: %__MODULE__{ - conn: pid(), - gun_state: gun_state(), - conn_state: conn_state(), - used_by: [pid()], - last_reference: pos_integer(), - crf: float(), - retries: pos_integer() - } - - defstruct conn: nil, - gun_state: :open, - conn_state: :init, - used_by: [], - last_reference: 0, - crf: 1, - retries: 0 - - @spec open(String.t() | URI.t(), atom(), keyword()) :: :ok | nil - def open(url, name, opts \\ []) - def open(url, name, opts) when is_binary(url), do: open(URI.parse(url), name, opts) - - def open(%URI{} = uri, name, opts) do + def open(%URI{} = uri, opts) do pool_opts = Pleroma.Config.get([:connections_pool], []) opts = opts |> Enum.into(%{}) - |> Map.put_new(:retry, pool_opts[:retry] || 1) - |> Map.put_new(:retry_timeout, pool_opts[:retry_timeout] || 1000) |> Map.put_new(:await_up_timeout, pool_opts[:await_up_timeout] || 5_000) + |> Map.put_new(:supervise, false) |> maybe_add_tls_opts(uri) - key = "#{uri.scheme}:#{uri.host}:#{uri.port}" - - max_connections = pool_opts[:max_connections] || 250 - - conn_pid = - if Connections.count(name) < max_connections do - do_open(uri, opts) - else - close_least_used_and_do_open(name, uri, opts) - end - - if is_pid(conn_pid) do - conn = %Pleroma.Gun.Conn{ - conn: conn_pid, - gun_state: :up, - conn_state: :active, - last_reference: :os.system_time(:second) - } - - :ok = Gun.set_owner(conn_pid, Process.whereis(name)) - Connections.add_conn(name, key, conn) - end + do_open(uri, opts) end defp maybe_add_tls_opts(opts, %URI{scheme: "http"}), do: opts - defp maybe_add_tls_opts(opts, %URI{scheme: "https", host: host}) do + defp maybe_add_tls_opts(opts, %URI{scheme: "https"}) do tls_opts = [ verify: :verify_peer, cacertfile: CAStore.file_path(), depth: 20, reuse_sessions: false, - verify_fun: - {&:ssl_verify_hostname.verify_fun/3, - [check_hostname: Pleroma.HTTP.Connection.format_host(host)]} + log_level: :warning, + customize_hostname_check: [match_fun: :public_key.pkix_verify_hostname_match_fun(:https)] ] tls_opts = @@ -105,7 +53,7 @@ defmodule Pleroma.Gun.Conn do {:ok, _} <- Gun.await_up(conn, opts[:await_up_timeout]), stream <- Gun.connect(conn, connect_opts), {:response, :fin, 200, _} <- Gun.await(conn, stream) do - conn + {:ok, conn} else error -> Logger.warn( @@ -141,7 +89,7 @@ defmodule Pleroma.Gun.Conn do with {:ok, conn} <- Gun.open(proxy_host, proxy_port, opts), {:ok, _} <- Gun.await_up(conn, opts[:await_up_timeout]) do - conn + {:ok, conn} else error -> Logger.warn( @@ -155,11 +103,11 @@ defmodule Pleroma.Gun.Conn do end defp do_open(%URI{host: host, port: port} = uri, opts) do - host = Pleroma.HTTP.Connection.parse_host(host) + host = Pleroma.HTTP.AdapterHelper.parse_host(host) with {:ok, conn} <- Gun.open(host, port, opts), {:ok, _} <- Gun.await_up(conn, opts[:await_up_timeout]) do - conn + {:ok, conn} else error -> Logger.warn( @@ -171,7 +119,7 @@ defmodule Pleroma.Gun.Conn do end defp destination_opts(%URI{host: host, port: port}) do - host = Pleroma.HTTP.Connection.parse_host(host) + host = Pleroma.HTTP.AdapterHelper.parse_host(host) %{host: host, port: port} end @@ -181,17 +129,6 @@ defmodule Pleroma.Gun.Conn do defp add_http2_opts(opts, _, _), do: opts - defp close_least_used_and_do_open(name, uri, opts) do - with [{key, conn} | _conns] <- Connections.get_unused_conns(name), - :ok <- Gun.close(conn.conn) do - Connections.remove_conn(name, key) - - do_open(uri, opts) - else - [] -> {:error, :pool_overflowed} - end - end - def compose_uri_log(%URI{scheme: scheme, host: host, path: path}) do "#{scheme}://#{host}#{path}" end diff --git a/lib/pleroma/gun/connection_pool.ex b/lib/pleroma/gun/connection_pool.ex new file mode 100644 index 000000000..8b41a668c --- /dev/null +++ b/lib/pleroma/gun/connection_pool.ex @@ -0,0 +1,79 @@ +defmodule Pleroma.Gun.ConnectionPool do + @registry __MODULE__ + + alias Pleroma.Gun.ConnectionPool.WorkerSupervisor + + def children do + [ + {Registry, keys: :unique, name: @registry}, + Pleroma.Gun.ConnectionPool.WorkerSupervisor + ] + end + + def get_conn(uri, opts) do + key = "#{uri.scheme}:#{uri.host}:#{uri.port}" + + case Registry.lookup(@registry, key) do + # The key has already been registered, but connection is not up yet + [{worker_pid, nil}] -> + get_gun_pid_from_worker(worker_pid, true) + + [{worker_pid, {gun_pid, _used_by, _crf, _last_reference}}] -> + GenServer.cast(worker_pid, {:add_client, self(), false}) + {:ok, gun_pid} + + [] -> + # :gun.set_owner fails in :connected state for whatevever reason, + # so we open the connection in the process directly and send it's pid back + # We trust gun to handle timeouts by itself + case WorkerSupervisor.start_worker([key, uri, opts, self()]) do + {:ok, worker_pid} -> + get_gun_pid_from_worker(worker_pid, false) + + {:error, {:already_started, worker_pid}} -> + get_gun_pid_from_worker(worker_pid, true) + + err -> + err + end + end + end + + defp get_gun_pid_from_worker(worker_pid, register) do + # GenServer.call will block the process for timeout length if + # the server crashes on startup (which will happen if gun fails to connect) + # so instead we use cast + monitor + + ref = Process.monitor(worker_pid) + if register, do: GenServer.cast(worker_pid, {:add_client, self(), true}) + + receive do + {:conn_pid, pid} -> + Process.demonitor(ref) + {:ok, pid} + + {:DOWN, ^ref, :process, ^worker_pid, reason} -> + case reason do + {:shutdown, error} -> error + _ -> {:error, reason} + end + end + end + + def release_conn(conn_pid) do + # :ets.fun2ms(fn {_, {worker_pid, {gun_pid, _, _, _}}} when gun_pid == conn_pid -> + # worker_pid end) + query_result = + Registry.select(@registry, [ + {{:_, :"$1", {:"$2", :_, :_, :_}}, [{:==, :"$2", conn_pid}], [:"$1"]} + ]) + + case query_result do + [worker_pid] -> + GenServer.cast(worker_pid, {:remove_client, self()}) + + [] -> + :ok + end + end +end diff --git a/lib/pleroma/gun/connection_pool/reclaimer.ex b/lib/pleroma/gun/connection_pool/reclaimer.ex new file mode 100644 index 000000000..cea800882 --- /dev/null +++ b/lib/pleroma/gun/connection_pool/reclaimer.ex @@ -0,0 +1,85 @@ +defmodule Pleroma.Gun.ConnectionPool.Reclaimer do + use GenServer, restart: :temporary + + @registry Pleroma.Gun.ConnectionPool + + def start_monitor do + pid = + case :gen_server.start(__MODULE__, [], name: {:via, Registry, {@registry, "reclaimer"}}) do + {:ok, pid} -> + pid + + {:error, {:already_registered, pid}} -> + pid + end + + {pid, Process.monitor(pid)} + end + + @impl true + def init(_) do + {:ok, nil, {:continue, :reclaim}} + end + + @impl true + def handle_continue(:reclaim, _) do + max_connections = Pleroma.Config.get([:connections_pool, :max_connections]) + + reclaim_max = + [:connections_pool, :reclaim_multiplier] + |> Pleroma.Config.get() + |> Kernel.*(max_connections) + |> round + |> max(1) + + :telemetry.execute([:pleroma, :connection_pool, :reclaim, :start], %{}, %{ + max_connections: max_connections, + reclaim_max: reclaim_max + }) + + # :ets.fun2ms( + # fn {_, {worker_pid, {_, used_by, crf, last_reference}}} when used_by == [] -> + # {worker_pid, crf, last_reference} end) + unused_conns = + Registry.select( + @registry, + [ + {{:_, :"$1", {:_, :"$2", :"$3", :"$4"}}, [{:==, :"$2", []}], [{{:"$1", :"$3", :"$4"}}]} + ] + ) + + case unused_conns do + [] -> + :telemetry.execute( + [:pleroma, :connection_pool, :reclaim, :stop], + %{reclaimed_count: 0}, + %{ + max_connections: max_connections + } + ) + + {:stop, :no_unused_conns, nil} + + unused_conns -> + reclaimed = + unused_conns + |> Enum.sort(fn {_pid1, crf1, last_reference1}, {_pid2, crf2, last_reference2} -> + crf1 <= crf2 and last_reference1 <= last_reference2 + end) + |> Enum.take(reclaim_max) + + reclaimed + |> Enum.each(fn {pid, _, _} -> + DynamicSupervisor.terminate_child(Pleroma.Gun.ConnectionPool.WorkerSupervisor, pid) + end) + + :telemetry.execute( + [:pleroma, :connection_pool, :reclaim, :stop], + %{reclaimed_count: Enum.count(reclaimed)}, + %{max_connections: max_connections} + ) + + {:stop, :normal, nil} + end + end +end diff --git a/lib/pleroma/gun/connection_pool/worker.ex b/lib/pleroma/gun/connection_pool/worker.ex new file mode 100644 index 000000000..f33447cb6 --- /dev/null +++ b/lib/pleroma/gun/connection_pool/worker.ex @@ -0,0 +1,127 @@ +defmodule Pleroma.Gun.ConnectionPool.Worker do + alias Pleroma.Gun + use GenServer, restart: :temporary + + @registry Pleroma.Gun.ConnectionPool + + def start_link([key | _] = opts) do + GenServer.start_link(__MODULE__, opts, name: {:via, Registry, {@registry, key}}) + end + + @impl true + def init([_key, _uri, _opts, _client_pid] = opts) do + {:ok, nil, {:continue, {:connect, opts}}} + end + + @impl true + def handle_continue({:connect, [key, uri, opts, client_pid]}, _) do + with {:ok, conn_pid} <- Gun.Conn.open(uri, opts), + Process.link(conn_pid) do + time = :erlang.monotonic_time(:millisecond) + + {_, _} = + Registry.update_value(@registry, key, fn _ -> + {conn_pid, [client_pid], 1, time} + end) + + send(client_pid, {:conn_pid, conn_pid}) + + {:noreply, + %{key: key, timer: nil, client_monitors: %{client_pid => Process.monitor(client_pid)}}, + :hibernate} + else + err -> + {:stop, {:shutdown, err}, nil} + end + end + + @impl true + def handle_cast({:add_client, client_pid, send_pid_back}, %{key: key} = state) do + time = :erlang.monotonic_time(:millisecond) + + {{conn_pid, _, _, _}, _} = + Registry.update_value(@registry, key, fn {conn_pid, used_by, crf, last_reference} -> + {conn_pid, [client_pid | used_by], crf(time - last_reference, crf), time} + end) + + if send_pid_back, do: send(client_pid, {:conn_pid, conn_pid}) + + state = + if state.timer != nil do + Process.cancel_timer(state[:timer]) + %{state | timer: nil} + else + state + end + + ref = Process.monitor(client_pid) + + state = put_in(state.client_monitors[client_pid], ref) + {:noreply, state, :hibernate} + end + + @impl true + def handle_cast({:remove_client, client_pid}, %{key: key} = state) do + {{_conn_pid, used_by, _crf, _last_reference}, _} = + Registry.update_value(@registry, key, fn {conn_pid, used_by, crf, last_reference} -> + {conn_pid, List.delete(used_by, client_pid), crf, last_reference} + end) + + {ref, state} = pop_in(state.client_monitors[client_pid]) + Process.demonitor(ref) + + timer = + if used_by == [] do + max_idle = Pleroma.Config.get([:connections_pool, :max_idle_time], 30_000) + Process.send_after(self(), :idle_close, max_idle) + else + nil + end + + {:noreply, %{state | timer: timer}, :hibernate} + end + + @impl true + def handle_info(:idle_close, state) do + # Gun monitors the owner process, and will close the connection automatically + # when it's terminated + {:stop, :normal, state} + end + + # Gracefully shutdown if the connection got closed without any streams left + @impl true + def handle_info({:gun_down, _pid, _protocol, _reason, []}, state) do + {:stop, :normal, state} + end + + # Otherwise, shutdown with an error + @impl true + def handle_info({:gun_down, _pid, _protocol, _reason, _killed_streams} = down_message, state) do + {:stop, {:error, down_message}, state} + end + + @impl true + def handle_info({:DOWN, _ref, :process, pid, reason}, state) do + # Sometimes the client is dead before we demonitor it in :remove_client, so the message + # arrives anyway + + case state.client_monitors[pid] do + nil -> + {:noreply, state, :hibernate} + + _ref -> + :telemetry.execute( + [:pleroma, :connection_pool, :client_death], + %{client_pid: pid, reason: reason}, + %{key: state.key} + ) + + handle_cast({:remove_client, pid}, state) + end + end + + # LRFU policy: https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.55.1478 + defp crf(time_delta, prev_crf) do + 1 + :math.pow(0.5, 0.0001 * time_delta) * prev_crf + end +end diff --git a/lib/pleroma/gun/connection_pool/worker_supervisor.ex b/lib/pleroma/gun/connection_pool/worker_supervisor.ex new file mode 100644 index 000000000..39615c956 --- /dev/null +++ b/lib/pleroma/gun/connection_pool/worker_supervisor.ex @@ -0,0 +1,45 @@ +defmodule Pleroma.Gun.ConnectionPool.WorkerSupervisor do + @moduledoc "Supervisor for pool workers. Does not do anything except enforce max connection limit" + + use DynamicSupervisor + + def start_link(opts) do + DynamicSupervisor.start_link(__MODULE__, opts, name: __MODULE__) + end + + def init(_opts) do + DynamicSupervisor.init( + strategy: :one_for_one, + max_children: Pleroma.Config.get([:connections_pool, :max_connections]) + ) + end + + def start_worker(opts, retry \\ false) do + case DynamicSupervisor.start_child(__MODULE__, {Pleroma.Gun.ConnectionPool.Worker, opts}) do + {:error, :max_children} -> + if retry or free_pool() == :error do + :telemetry.execute([:pleroma, :connection_pool, :provision_failure], %{opts: opts}) + {:error, :pool_full} + else + start_worker(opts, true) + end + + res -> + res + end + end + + defp free_pool do + wait_for_reclaimer_finish(Pleroma.Gun.ConnectionPool.Reclaimer.start_monitor()) + end + + defp wait_for_reclaimer_finish({pid, mon}) do + receive do + {:DOWN, ^mon, :process, ^pid, :no_unused_conns} -> + :error + + {:DOWN, ^mon, :process, ^pid, :normal} -> + :ok + end + end +end |