From 72cc7b336954a9af8f7cd3adb6d2f70be4d49e6d Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Thu, 13 May 2021 13:02:42 -0500 Subject: Rich media: merge URIs when needed --- lib/pleroma/web/rich_media/parser/card.ex | 15 ++++-- test/pleroma/web/rich_media/parser/card_test.exs | 59 ++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 3 deletions(-) diff --git a/lib/pleroma/web/rich_media/parser/card.ex b/lib/pleroma/web/rich_media/parser/card.ex index b29db730b..abae06ab9 100644 --- a/lib/pleroma/web/rich_media/parser/card.ex +++ b/lib/pleroma/web/rich_media/parser/card.ex @@ -40,8 +40,8 @@ defmodule Pleroma.Web.RichMedia.Parser.Card do html: sanitize_html(oembed["html"]), width: oembed["width"], height: oembed["height"], - image: get_image(oembed) |> proxy(), - embed_url: oembed["url"] |> proxy() + image: get_image(oembed) |> fix_uri(url) |> proxy(), + embed_url: oembed["url"] |> fix_uri(url) |> proxy() } |> validate() end @@ -56,7 +56,7 @@ defmodule Pleroma.Web.RichMedia.Parser.Card do type: "link", provider_name: uri.host, provider_url: "#{uri.scheme}://#{uri.host}", - image: get_image(embed) |> proxy() + image: get_image(embed) |> fix_uri(url) |> proxy() } |> validate() end @@ -112,6 +112,15 @@ defmodule Pleroma.Web.RichMedia.Parser.Card do defp stringify_keys(%{} = map), do: Map.new(map, fn {k, v} -> {Atom.to_string(k), v} end) + def fix_uri("http://" <> _ = uri, _base_uri), do: uri + def fix_uri("https://" <> _ = uri, _base_uri), do: uri + def fix_uri("/" <> _ = uri, base_uri), do: URI.merge(base_uri, uri) |> URI.to_string() + + def fix_uri(uri, base_uri) when is_binary(uri), + do: URI.merge(base_uri, "/#{uri}") |> URI.to_string() + + def fix_uri(_uri, _base_uri), do: nil + defp proxy(url) when is_binary(url), do: Pleroma.Web.MediaProxy.url(url) defp proxy(_), do: nil diff --git a/test/pleroma/web/rich_media/parser/card_test.exs b/test/pleroma/web/rich_media/parser/card_test.exs index e09dfa6db..d85491f2e 100644 --- a/test/pleroma/web/rich_media/parser/card_test.exs +++ b/test/pleroma/web/rich_media/parser/card_test.exs @@ -32,6 +32,19 @@ defmodule Pleroma.Web.RichMedia.Parser.CardTest do assert Card.parse(embed) == {:ok, expected} end + + test "converts URL paths into absolute URLs" do + embed = %Embed{ + url: "https://spam.com/luigi", + title: "Watch Luigi not doing anything", + meta: %{ + "og:image" => "/uploads/weegee.jpeg" + } + } + + {:ok, card} = Card.parse(embed) + assert card.image == "https://spam.com/uploads/weegee.jpeg" + end end describe "validate/1" do @@ -44,5 +57,51 @@ defmodule Pleroma.Web.RichMedia.Parser.CardTest do assert {:ok, ^card} = Card.validate(card) end + + test "errors for video embeds without html" do + embed = %Embed{ + url: "https://spam.com/xyz", + oembed: %{ + "type" => "video", + "title" => "Yeeting soda cans" + } + } + + assert {:error, {:invalid_metadata, _}} = Card.validate(embed) + end + end + + describe "fix_uri/2" do + setup do: %{base_uri: "https://benis.xyz/hello/fam"} + + test "two full URLs", %{base_uri: base_uri} do + uri = "https://benis.xyz/images/pic.jpeg" + assert Card.fix_uri(uri, base_uri) == uri + end + + test "URI with leading slash", %{base_uri: base_uri} do + uri = "/images/pic.jpeg" + expected = "https://benis.xyz/images/pic.jpeg" + assert Card.fix_uri(uri, base_uri) == expected + end + + test "URI without leading slash", %{base_uri: base_uri} do + uri = "images/pic.jpeg" + expected = "https://benis.xyz/images/pic.jpeg" + assert Card.fix_uri(uri, base_uri) == expected + end + + test "nil URI", %{base_uri: base_uri} do + assert Card.fix_uri(nil, base_uri) == nil + end + + # https://github.com/elixir-lang/elixir/issues/10771 + test "Elixir #10771", _ do + uri = + "https://images.macrumors.com/t/4riJyi1XC906qyJ41nAfOgpvo1I=/1600x/https://images.macrumors.com/article-new/2020/09/spatialaudiofeature.jpg" + + base_uri = "https://www.macrumors.com/guide/apps-support-apples-spatial-audio-feature/" + assert Card.fix_uri(uri, base_uri) == uri + end end end -- cgit v1.2.3 From 92eb248581dbbdbac68a240c401f5357b45baf27 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Thu, 13 May 2021 14:03:53 -0500 Subject: Parser: don't be so strict about validating Embeds (leave it up to the Card) --- lib/pleroma/web/rich_media/parser.ex | 7 ++--- test/pleroma/web/rich_media/parser_test.exs | 40 ++++++++++++++++++++++++++--- 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex index 66c90682c..0532e51ad 100644 --- a/lib/pleroma/web/rich_media/parser.ex +++ b/lib/pleroma/web/rich_media/parser.ex @@ -4,7 +4,6 @@ defmodule Pleroma.Web.RichMedia.Parser do require Logger - alias Pleroma.Web.RichMedia.Parser.Card alias Pleroma.Web.RichMedia.Parser.Embed @cachex Pleroma.Config.get([:cachex, :provider], Cachex) @@ -145,8 +144,7 @@ defmodule Pleroma.Web.RichMedia.Parser do {:ok, %Tesla.Env{body: json}} <- Pleroma.Web.RichMedia.Helpers.oembed_get(oembed_url), {:ok, data} <- Jason.decode(json), - embed <- %Embed{url: url, oembed: data}, - {:ok, %Card{}} <- Card.validate(embed) do + embed <- %Embed{url: url, oembed: data} do {:ok, embed} else {:error, error} -> {:error, error} @@ -157,8 +155,7 @@ defmodule Pleroma.Web.RichMedia.Parser do defp fetch_document(url) do with {:ok, %Tesla.Env{body: html}} <- Pleroma.Web.RichMedia.Helpers.rich_media_get(url), {:ok, html} <- Floki.parse_document(html), - %Embed{} = embed <- parse_embed(html, url), - {:ok, %Card{}} <- Card.validate(embed) do + %Embed{} = embed <- parse_embed(html, url) do {:ok, embed} else {:error, error} -> {:error, error} diff --git a/test/pleroma/web/rich_media/parser_test.exs b/test/pleroma/web/rich_media/parser_test.exs index f7d010219..8506953bb 100644 --- a/test/pleroma/web/rich_media/parser_test.exs +++ b/test/pleroma/web/rich_media/parser_test.exs @@ -83,8 +83,15 @@ defmodule Pleroma.Web.RichMedia.ParserTest do :ok end - test "returns error when no metadata present" do - assert {:error, _} = Parser.parse("http://example.com/empty") + test "returns empty embed when no metadata present" do + expected = %Embed{ + meta: %{}, + oembed: nil, + title: nil, + url: "http://example.com/empty" + } + + assert Parser.parse("http://example.com/empty") == {:ok, expected} end test "parses ogp" do @@ -166,8 +173,33 @@ defmodule Pleroma.Web.RichMedia.ParserTest do assert Parser.parse(url) == {:ok, expected} end - test "rejects invalid OGP data" do - assert {:error, _} = Parser.parse("http://example.com/malformed") + test "cleans corrupted meta data" do + expected = %Embed{ + meta: %{ + "Keywords" => "Konsument i zakupy", + "ROBOTS" => "NOARCHIVE", + "fb:app_id" => "515714931781741", + "fb:pages" => "288018984602680", + "google-site-verification" => "3P4BE3hLw82QWqtseIE60qQcOtrpMxMnCNkcv62pjTA", + "news_keywords" => "Konsument i zakupy", + "og:image" => + "https://bi.im-g.pl/im/f7/49/17/z24418295FBW,Prace-nad-projektem-chusty-antysmogowej-rozpoczely.jpg", + "og:locale" => "pl_PL", + "og:site_name" => "wyborcza.biz", + "og:type" => "article", + "og:url" => + "http://wyborcza.biz/biznes/7,147743,24417936,pomysl-na-biznes-chusta-ktora-chroni-przed-smogiem.html", + "twitter:card" => "summary_large_image", + "twitter:image" => + "https://bi.im-g.pl/im/f7/49/17/z24418295FBW,Prace-nad-projektem-chusty-antysmogowej-rozpoczely.jpg", + "viewport" => "width=device-width, user-scalable=yes" + }, + oembed: nil, + title: nil, + url: "http://example.com/malformed" + } + + assert Parser.parse("http://example.com/malformed") == {:ok, expected} end test "returns error if getting page was not successful" do -- cgit v1.2.3 From 199747397935610d5c4232ab96c4be841116409a Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Thu, 13 May 2021 14:22:50 -0500 Subject: Rich media: have invalid Rich/Video fall back to Link --- lib/pleroma/web/rich_media/parser/card.ex | 5 +- test/fixtures/rich_media/wordpress.html | 584 +++++++++++++++++++++++ test/fixtures/rich_media/wordpress_embed.json | 1 + test/pleroma/web/rich_media/parser/card_test.exs | 46 +- 4 files changed, 623 insertions(+), 13 deletions(-) create mode 100644 test/fixtures/rich_media/wordpress.html create mode 100644 test/fixtures/rich_media/wordpress_embed.json diff --git a/lib/pleroma/web/rich_media/parser/card.ex b/lib/pleroma/web/rich_media/parser/card.ex index abae06ab9..528837614 100644 --- a/lib/pleroma/web/rich_media/parser/card.ex +++ b/lib/pleroma/web/rich_media/parser/card.ex @@ -115,6 +115,7 @@ defmodule Pleroma.Web.RichMedia.Parser.Card do def fix_uri("http://" <> _ = uri, _base_uri), do: uri def fix_uri("https://" <> _ = uri, _base_uri), do: uri def fix_uri("/" <> _ = uri, base_uri), do: URI.merge(base_uri, uri) |> URI.to_string() + def fix_uri("", _base_uri), do: nil def fix_uri(uri, base_uri) when is_binary(uri), do: URI.merge(base_uri, "/#{uri}") |> URI.to_string() @@ -126,7 +127,9 @@ defmodule Pleroma.Web.RichMedia.Parser.Card do def validate(%Card{type: type, html: html} = card) when type in ["video", "rich"] and (is_binary(html) == false or html == "") do - {:error, {:invalid_metadata, card}} + card + |> Map.put(:type, "link") + |> validate() end def validate(%Card{type: type, title: title} = card) diff --git a/test/fixtures/rich_media/wordpress.html b/test/fixtures/rich_media/wordpress.html new file mode 100644 index 000000000..68d28e7e6 --- /dev/null +++ b/test/fixtures/rich_media/wordpress.html @@ -0,0 +1,584 @@ + + + + + + + + + + + + Mexican Drug Cartel Animation — by I Shot Him + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+
+
+ +
+
+ +
+
+
+
+
+
+
+ +
+
+
+

Look what you did ya li'l turd!

+
+ Everyone knows Internet Explorer turns the internet into a steaming hot pile of smelly poo poo. +
+ +
+
+
+
+
+ +
+
+
+
+
+ +
+ +
+
+ +
+ +
+

Mexican Drug Cartels

+

We collaborated with the folks at Visual.ly on this informative animation about the violence from drug cartels happening right across our border. We researched, wrote, illustrated, and animated this piece to inform people about the connections of our drug and gun laws to the death of innocents in Mexico.

+ +
+
+
+
+
+

Sharing is Caring

+ push one of these totally not dorky buttons and we will owe you one. + +
+
+
+
+ +
+ +
+ +

+

Animation

+

Mexican Cartel Animation - by I Shot Him

+

Title Screen

+

Cartel_storyboard

+

Beheadings

+

Cartel_storyboard

+

Cartel Member

+

Cartel_storyboard

+

Death Toll

+

Cartel_storyboard

+

Mass Grave

+

Cartel_storyboard

+

Death toll of police officers

+

Cartel_storyboard

+

Molotov cocktail used to burn down houses

+

Cartel_storyboard

+

We do a bunch of drugs

+

Cartel_storyboard

+

Obama needs to step up and change gun laws and drug laws

+
+ +
+

Sharing is Caring

+ push one of these totally not dorky buttons and we will owe you one. + +
+ + + + +
+ + + +
+ + +
+
+ +
+
+
+
+ + + + +
+ + + + +
+
+
+
+
+
+ +
+
+
+

Internet Explorer?

+
+ Unfortunately IE doesn't do a very good job of displaying the internet. +
+ +
+
+ + + + + + + + + + + + + + + diff --git a/test/fixtures/rich_media/wordpress_embed.json b/test/fixtures/rich_media/wordpress_embed.json new file mode 100644 index 000000000..51cd6c3c1 --- /dev/null +++ b/test/fixtures/rich_media/wordpress_embed.json @@ -0,0 +1 @@ +{"version":"1.0","provider_name":"I Shot Him","provider_url":"https:\/\/ishothim.com","author_name":"Michael Jeter","author_url":"https:\/\/ishothim.com\/author\/mike\/","title":"Mexican Drug Cartels","type":"rich","width":600,"height":338,"html":"
Mexican Drug Cartels<\/a><\/blockquote>\n