diff options
-rw-r--r-- | lib/pleroma/web/mastodon_api/views/status_view.ex | 9 | ||||
-rw-r--r-- | lib/pleroma/web/rich_media/helpers.ex | 21 | ||||
-rw-r--r-- | lib/pleroma/web/rich_media/parser.ex | 43 | ||||
-rw-r--r-- | lib/pleroma/web/rich_media/parser/card.ex | 107 | ||||
-rw-r--r-- | lib/pleroma/web/rich_media/parser/embed.ex | 10 |
5 files changed, 109 insertions, 81 deletions
diff --git a/lib/pleroma/web/mastodon_api/views/status_view.ex b/lib/pleroma/web/mastodon_api/views/status_view.ex index 5eb09f0a1..ecf81b65d 100644 --- a/lib/pleroma/web/mastodon_api/views/status_view.ex +++ b/lib/pleroma/web/mastodon_api/views/status_view.ex @@ -20,6 +20,8 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do alias Pleroma.Web.MastodonAPI.StatusView alias Pleroma.Web.MediaProxy alias Pleroma.Web.PleromaAPI.EmojiReactionController + alias Pleroma.Web.RichMedia.Parser.Card + alias Pleroma.Web.RichMedia.Parser.Embed import Pleroma.Web.ActivityPub.Visibility, only: [get_visibility: 1, visible_for_user?: 2] @@ -367,10 +369,13 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do nil end - def render("card.json", %{rich_media: rich_media, page_url: _page_url}) do - rich_media + def render("card.json", %Embed{url: _, meta: _} = embed) do + embed + |> Card.parse() + |> Card.to_map() end + def render("card.json", %Card{} = card), do: Card.to_map(card) def render("card.json", _), do: nil def render("attachment.json", %{attachment: attachment}) do diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex index 249730aea..7021c70ff 100644 --- a/lib/pleroma/web/rich_media/helpers.ex +++ b/lib/pleroma/web/rich_media/helpers.ex @@ -8,7 +8,7 @@ defmodule Pleroma.Web.RichMedia.Helpers do alias Pleroma.HTML alias Pleroma.Object alias Pleroma.Web.RichMedia.Parser - alias Pleroma.Web.RichMedia.Parser.Card + alias Pleroma.Web.RichMedia.Parser.Embed @options [ pool: :media, @@ -58,26 +58,15 @@ defmodule Pleroma.Web.RichMedia.Helpers do |> hd end - defp strip_card(%Card{} = card) do - card - |> Map.from_struct() - |> Map.new(fn {k, v} -> {Atom.to_string(k), v} end) - end - - defp strip_card(%{} = card) do - Map.new(card, fn {k, v} -> {Atom.to_string(k), v} end) - end - def fetch_data_for_object(object) do with true <- Config.get([:rich_media, :enabled]), {:ok, page_url} <- HTML.extract_first_external_url_from_object(object), :ok <- validate_page_url(page_url), - {:ok, rich_media} <- Parser.parse(page_url), - rich_media <- strip_card(rich_media) do - %{page_url: page_url, rich_media: rich_media} + {:ok, %Embed{} = embed} <- Parser.parse(page_url) do + embed else - _ -> %{} + _ -> nil end end @@ -86,7 +75,7 @@ defmodule Pleroma.Web.RichMedia.Helpers do %Object{} = object <- Object.normalize(activity, fetch: false) do fetch_data_for_object(object) else - _ -> %{} + _ -> nil end end diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex index d628513be..0eaf3274e 100644 --- a/lib/pleroma/web/rich_media/parser.ex +++ b/lib/pleroma/web/rich_media/parser.ex @@ -5,6 +5,7 @@ defmodule Pleroma.Web.RichMedia.Parser do require Logger alias Pleroma.Web.RichMedia.Parser.Card + alias Pleroma.Web.RichMedia.Parser.Embed @cachex Pleroma.Config.get([:cachex, :provider], Cachex) @@ -133,7 +134,7 @@ defmodule Pleroma.Web.RichMedia.Parser do def parse_url(url) do case maybe_fetch_oembed(url) do - {:ok, %Card{} = card} -> {:ok, card} + {:ok, %Embed{} = embed} -> {:ok, embed} _ -> fetch_document(url) end end @@ -143,8 +144,9 @@ defmodule Pleroma.Web.RichMedia.Parser do {:ok, %Tesla.Env{body: json}} <- Pleroma.Web.RichMedia.Helpers.oembed_get(oembed_url), {:ok, data} <- Jason.decode(json), - %Card{} = card <- Card.from_oembed(data, url) do - {:ok, card} + embed <- %Embed{url: url, oembed: data}, + {:ok, %Card{}} <- Card.validate(embed) do + {:ok, embed} else {:error, error} -> {:error, error} error -> {:error, error} @@ -153,36 +155,19 @@ defmodule Pleroma.Web.RichMedia.Parser do defp fetch_document(url) do with {:ok, %Tesla.Env{body: html}} <- Pleroma.Web.RichMedia.Helpers.rich_media_get(url), - {:ok, html} <- Floki.parse_document(html) do - html - |> maybe_parse() - |> Map.put("url", url) - |> clean_parsed_data() - |> Card.from_discovery(url) - |> check_card() + {:ok, html} <- Floki.parse_document(html), + %Embed{} = embed <- parse_embed(html, url), + {:ok, %Card{}} <- Card.validate(embed) do + {:ok, embed} + else + {:error, error} -> {:error, error} + error -> {:error, error} end end - defp maybe_parse(html) do - Enum.reduce(parsers(), %{}, fn parser, acc -> + defp parse_embed(html, url) do + Enum.reduce(parsers(), %Embed{url: url}, fn parser, acc -> parser.parse(html, acc) end) end - - defp check_card(%Card{title: title} = card) - when is_binary(title) and title != "" do - {:ok, card} - end - - defp check_card(card) do - {:error, {:invalid_metadata, card}} - end - - defp clean_parsed_data(data) do - data - |> Enum.reject(fn {key, val} -> - not match?({:ok, _}, Jason.encode(%{key => val})) - end) - |> Map.new() - end end diff --git a/lib/pleroma/web/rich_media/parser/card.ex b/lib/pleroma/web/rich_media/parser/card.ex index da0acc789..399b41195 100644 --- a/lib/pleroma/web/rich_media/parser/card.ex +++ b/lib/pleroma/web/rich_media/parser/card.ex @@ -3,6 +3,9 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Web.RichMedia.Parser.Card do + alias Pleroma.Web.RichMedia.Parser.Card + alias Pleroma.Web.RichMedia.Parser.Embed + @types ["link", "photo", "video", "rich"] # https://docs.joinmastodon.org/entities/card/ @@ -21,12 +24,13 @@ defmodule Pleroma.Web.RichMedia.Parser.Card do embed_url: "", blurhash: nil - def from_oembed(%{"type" => type, "title" => title} = oembed, url) when type in @types do - %__MODULE__{ + def parse(%{url: url, oembed: %{"type" => type, "title" => title} = oembed} = embed) + when type in @types do + %Card{ url: url, title: title, - description: "", - type: type, + description: get_description(embed), + type: oembed["type"], author_name: oembed["author_name"], author_url: oembed["author_url"], provider_name: oembed["provider_name"], @@ -39,39 +43,74 @@ defmodule Pleroma.Web.RichMedia.Parser.Card do } end - def from_oembed(_oembed, _url), do: nil - - def from_discovery(%{"type" => "link"} = rich_media, page_url) do - page_url_data = URI.parse(page_url) - - page_url_data = - if is_binary(rich_media["url"]) do - URI.merge(page_url_data, URI.parse(rich_media["url"])) - else - page_url_data - end - - page_url = page_url_data |> to_string - - image_url = - if is_binary(rich_media["image"]) do - URI.merge(page_url_data, URI.parse(rich_media["image"])) - |> to_string - end - - %__MODULE__{ - type: "link", - provider_name: page_url_data.host, - provider_url: page_url_data.scheme <> "://" <> page_url_data.host, - url: page_url, - image: image_url |> proxy(), - title: rich_media["title"] || "", - description: rich_media["description"] || "" - } + def parse(%{url: url} = embed) do + title = get_title(embed) + + if is_binary(title) do + %Card{ + url: url, + title: title, + description: get_description(embed), + type: "link", + image: get_image(embed) |> proxy() + } + else + nil + end + end + + def parse(_), do: nil + + defp get_title(embed) do + case embed do + %{meta: %{"twitter:title" => title}} when is_binary(title) and title != "" -> title + %{meta: %{"og:title" => title}} when is_binary(title) and title != "" -> title + %{title: title} when is_binary(title) and title != "" -> title + _ -> "" + end + end + + defp get_description(%{meta: meta}) do + case meta do + %{"twitter:description" => desc} when is_binary(desc) and desc != "" -> desc + %{"og:description" => desc} when is_binary(desc) and desc != "" -> desc + %{"description" => desc} when is_binary(desc) and desc != "" -> desc + _ -> "" + end + end + + defp get_image(%{meta: meta}) do + case meta do + %{"twitter:image" => image} when is_binary(image) and image != "" -> image + %{"og:image" => image} when is_binary(image) and image != "" -> image + _ -> "" + end + end + + def to_map(%Card{} = card) do + card + |> Map.from_struct() + |> stringify_keys() end - def from_discovery(rich_media, url), do: from_oembed(rich_media, url) + def to_map(%{} = card), do: stringify_keys(card) + + defp stringify_keys(%{} = map), do: Map.new(map, fn {k, v} -> {Atom.to_string(k), v} end) defp proxy(url) when is_binary(url), do: Pleroma.Web.MediaProxy.url(url) defp proxy(_), do: nil + + def validate(%Card{type: type, title: title} = card) + when type in @types and is_binary(title) and title != "" do + {:ok, card} + end + + def validate(%Embed{} = embed) do + case Card.parse(embed) do + %Card{} = card -> validate(card) + card -> {:error, {:invalid_metadata, card}} + end + end + + def validate(card), do: {:error, {:invalid_metadata, card}} end diff --git a/lib/pleroma/web/rich_media/parser/embed.ex b/lib/pleroma/web/rich_media/parser/embed.ex new file mode 100644 index 000000000..509e557e6 --- /dev/null +++ b/lib/pleroma/web/rich_media/parser/embed.ex @@ -0,0 +1,10 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/> +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Web.RichMedia.Parser.Embed do + @moduledoc """ + Represents embedded content, including scraped markup and OEmbed. + """ + defstruct url: nil, meta: nil, oembed: nil +end |