diff options
Diffstat (limited to 'lib/pleroma/web/rich_media')
-rw-r--r-- | lib/pleroma/web/rich_media/helpers.ex | 67 | ||||
-rw-r--r-- | lib/pleroma/web/rich_media/parser.ex | 71 | ||||
-rw-r--r-- | lib/pleroma/web/rich_media/parser/ttl.ex | 7 | ||||
-rw-r--r-- | lib/pleroma/web/rich_media/parser/ttl/aws_signed_url.ex (renamed from lib/pleroma/web/rich_media/parsers/ttl/aws_signed_url.ex) | 6 | ||||
-rw-r--r-- | lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex | 2 | ||||
-rw-r--r-- | lib/pleroma/web/rich_media/parsers/o_embed.ex (renamed from lib/pleroma/web/rich_media/parsers/oembed_parser.ex) | 2 | ||||
-rw-r--r-- | lib/pleroma/web/rich_media/parsers/ogp.ex | 2 | ||||
-rw-r--r-- | lib/pleroma/web/rich_media/parsers/ttl/ttl.ex | 3 | ||||
-rw-r--r-- | lib/pleroma/web/rich_media/parsers/twitter_card.ex | 2 |
9 files changed, 117 insertions, 45 deletions
diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex index 752ca9f81..566fc8c8a 100644 --- a/lib/pleroma/web/rich_media/helpers.ex +++ b/lib/pleroma/web/rich_media/helpers.ex @@ -9,14 +9,15 @@ defmodule Pleroma.Web.RichMedia.Helpers do alias Pleroma.Object alias Pleroma.Web.RichMedia.Parser - @rich_media_options [ + @options [ pool: :media, - max_body: 2_000_000 + max_body: 2_000_000, + recv_timeout: 2_000 ] @spec validate_page_url(URI.t() | binary()) :: :ok | :error defp validate_page_url(page_url) when is_binary(page_url) do - validate_tld = Pleroma.Config.get([Pleroma.Formatter, :validate_tld]) + validate_tld = Config.get([Pleroma.Formatter, :validate_tld]) page_url |> Linkify.Parser.url?(validate_tld: validate_tld) @@ -56,7 +57,6 @@ defmodule Pleroma.Web.RichMedia.Helpers do def fetch_data_for_object(object) do with true <- Config.get([:rich_media, :enabled]), - false <- object.data["sensitive"] || false, {:ok, page_url} <- HTML.extract_first_external_url_from_object(object), :ok <- validate_page_url(page_url), @@ -69,7 +69,7 @@ defmodule Pleroma.Web.RichMedia.Helpers do def fetch_data_for_activity(%Activity{data: %{"type" => "Create"}} = activity) do with true <- Config.get([:rich_media, :enabled]), - %Object{} = object <- Object.normalize(activity) do + %Object{} = object <- Object.normalize(activity, fetch: false) do fetch_data_for_object(object) else _ -> %{} @@ -78,24 +78,53 @@ defmodule Pleroma.Web.RichMedia.Helpers do def fetch_data_for_activity(_), do: %{} - def perform(:fetch, %Activity{} = activity) do - fetch_data_for_activity(activity) - :ok - end - def rich_media_get(url) do headers = [{"user-agent", Pleroma.Application.user_agent() <> "; Bot"}] - options = - if Application.get_env(:tesla, :adapter) == Tesla.Adapter.Hackney do - Keyword.merge(@rich_media_options, - recv_timeout: 2_000, - with_body: true - ) - else - @rich_media_options + head_check = + case Pleroma.HTTP.head(url, headers, @options) do + # If the HEAD request didn't reach the server for whatever reason, + # we assume the GET that comes right after won't either + {:error, _} = e -> + e + + {:ok, %Tesla.Env{status: 200, headers: headers}} -> + with :ok <- check_content_type(headers), + :ok <- check_content_length(headers), + do: :ok + + _ -> + :ok end - Pleroma.HTTP.get(url, headers, adapter: options) + with :ok <- head_check, do: Pleroma.HTTP.get(url, headers, @options) + end + + defp check_content_type(headers) do + case List.keyfind(headers, "content-type", 0) do + {_, content_type} -> + case Plug.Conn.Utils.media_type(content_type) do + {:ok, "text", "html", _} -> :ok + _ -> {:error, {:content_type, content_type}} + end + + _ -> + :ok + end + end + + @max_body @options[:max_body] + defp check_content_length(headers) do + case List.keyfind(headers, "content-length", 0) do + {_, maybe_content_length} -> + case Integer.parse(maybe_content_length) do + {content_length, ""} when content_length <= @max_body -> :ok + {_, ""} -> {:error, :body_too_large} + _ -> :ok + end + + _ -> + :ok + end end end diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex index 5727fda18..d6b54943b 100644 --- a/lib/pleroma/web/rich_media/parser.ex +++ b/lib/pleroma/web/rich_media/parser.ex @@ -1,10 +1,12 @@ # Pleroma: A lightweight social networking server -# Copyright © 2017-2020 Pleroma Authors <https://pleroma.social/> +# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/> # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Web.RichMedia.Parser do require Logger + @cachex Pleroma.Config.get([:cachex, :provider], Cachex) + defp parsers do Pleroma.Config.get([:rich_media, :parsers]) end @@ -20,28 +22,61 @@ defmodule Pleroma.Web.RichMedia.Parser do with {:ok, data} <- get_cached_or_parse(url), {:ok, _} <- set_ttl_based_on_image(data, url) do {:ok, data} - else - {:error, {:invalid_metadata, data}} = e -> - Logger.debug(fn -> "Incomplete or invalid metadata for #{url}: #{inspect(data)}" end) - e - - error -> - Logger.error(fn -> "Rich media error for #{url}: #{inspect(error)}" end) - error end end defp get_cached_or_parse(url) do - case Cachex.fetch!(:rich_media_cache, url, fn _ -> {:commit, parse_url(url)} end) do - {:ok, _data} = res -> - res - - {:error, _} = e -> - ttl = Pleroma.Config.get([:rich_media, :failure_backoff], 60_000) - Cachex.expire(:rich_media_cache, url, ttl) - e + case @cachex.fetch(:rich_media_cache, url, fn -> + case parse_url(url) do + {:ok, _} = res -> + {:commit, res} + + {:error, reason} = e -> + # Unfortunately we have to log errors here, instead of doing that + # along with ttl setting at the bottom. Otherwise we can get log spam + # if more than one process was waiting for the rich media card + # while it was generated. Ideally we would set ttl here as well, + # so we don't override it number_of_waiters_on_generation + # times, but one, obviously, can't set ttl for not-yet-created entry + # and Cachex doesn't support returning ttl from the fetch callback. + log_error(url, reason) + {:commit, e} + end + end) do + {action, res} when action in [:commit, :ok] -> + case res do + {:ok, _data} = res -> + res + + {:error, reason} = e -> + if action == :commit, do: set_error_ttl(url, reason) + e + end + + {:error, e} -> + {:error, {:cachex_error, e}} end end + + defp set_error_ttl(_url, :body_too_large), do: :ok + defp set_error_ttl(_url, {:content_type, _}), do: :ok + + # The TTL is not set for the errors above, since they are unlikely to change + # with time + + defp set_error_ttl(url, _reason) do + ttl = Pleroma.Config.get([:rich_media, :failure_backoff], 60_000) + @cachex.expire(:rich_media_cache, url, ttl) + :ok + end + + defp log_error(url, {:invalid_metadata, data}) do + Logger.debug(fn -> "Incomplete or invalid metadata for #{url}: #{inspect(data)}" end) + end + + defp log_error(url, reason) do + Logger.warn(fn -> "Rich media error for #{url}: #{inspect(reason)}" end) + end end @doc """ @@ -73,7 +108,7 @@ defmodule Pleroma.Web.RichMedia.Parser do {:ok, ttl} when is_number(ttl) -> ttl = ttl * 1000 - case Cachex.expire_at(:rich_media_cache, url, ttl) do + case @cachex.expire_at(:rich_media_cache, url, ttl) do {:ok, true} -> {:ok, ttl} {:ok, false} -> {:error, :no_key} end diff --git a/lib/pleroma/web/rich_media/parser/ttl.ex b/lib/pleroma/web/rich_media/parser/ttl.ex new file mode 100644 index 000000000..0b7f14fb2 --- /dev/null +++ b/lib/pleroma/web/rich_media/parser/ttl.ex @@ -0,0 +1,7 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/> +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Web.RichMedia.Parser.TTL do + @callback ttl(Map.t(), String.t()) :: Integer.t() | nil +end diff --git a/lib/pleroma/web/rich_media/parsers/ttl/aws_signed_url.ex b/lib/pleroma/web/rich_media/parser/ttl/aws_signed_url.ex index c5aaea2d4..c7eb267f3 100644 --- a/lib/pleroma/web/rich_media/parsers/ttl/aws_signed_url.ex +++ b/lib/pleroma/web/rich_media/parser/ttl/aws_signed_url.ex @@ -1,7 +1,11 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/> +# SPDX-License-Identifier: AGPL-3.0-only + defmodule Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl do @behaviour Pleroma.Web.RichMedia.Parser.TTL - @impl Pleroma.Web.RichMedia.Parser.TTL + @impl true def ttl(data, _url) do image = Map.get(data, :image) diff --git a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex index 3d577e254..31c3d1e33 100644 --- a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex +++ b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex @@ -1,5 +1,5 @@ # Pleroma: A lightweight social networking server -# Copyright © 2017-2020 Pleroma Authors <https://pleroma.social/> +# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/> # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do diff --git a/lib/pleroma/web/rich_media/parsers/oembed_parser.ex b/lib/pleroma/web/rich_media/parsers/o_embed.ex index 1fe6729c3..09eabec56 100644 --- a/lib/pleroma/web/rich_media/parsers/oembed_parser.ex +++ b/lib/pleroma/web/rich_media/parsers/o_embed.ex @@ -1,5 +1,5 @@ # Pleroma: A lightweight social networking server -# Copyright © 2017-2020 Pleroma Authors <https://pleroma.social/> +# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/> # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do diff --git a/lib/pleroma/web/rich_media/parsers/ogp.ex b/lib/pleroma/web/rich_media/parsers/ogp.ex index b3b3b059c..d0edf1c88 100644 --- a/lib/pleroma/web/rich_media/parsers/ogp.ex +++ b/lib/pleroma/web/rich_media/parsers/ogp.ex @@ -1,5 +1,5 @@ # Pleroma: A lightweight social networking server -# Copyright © 2017-2020 Pleroma Authors <https://pleroma.social/> +# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/> # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Web.RichMedia.Parsers.OGP do diff --git a/lib/pleroma/web/rich_media/parsers/ttl/ttl.ex b/lib/pleroma/web/rich_media/parsers/ttl/ttl.ex deleted file mode 100644 index 6b3ec6d30..000000000 --- a/lib/pleroma/web/rich_media/parsers/ttl/ttl.ex +++ /dev/null @@ -1,3 +0,0 @@ -defmodule Pleroma.Web.RichMedia.Parser.TTL do - @callback ttl(Map.t(), String.t()) :: {:ok, Integer.t()} | {:error, String.t()} -end diff --git a/lib/pleroma/web/rich_media/parsers/twitter_card.ex b/lib/pleroma/web/rich_media/parsers/twitter_card.ex index 4a04865d2..0adf84159 100644 --- a/lib/pleroma/web/rich_media/parsers/twitter_card.ex +++ b/lib/pleroma/web/rich_media/parsers/twitter_card.ex @@ -1,5 +1,5 @@ # Pleroma: A lightweight social networking server -# Copyright © 2017-2020 Pleroma Authors <https://pleroma.social/> +# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/> # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Web.RichMedia.Parsers.TwitterCard do |