4 files changed, 468 insertions, 0 deletions
diff --git a/test/pleroma/web/rich_media/helpers_test.exs b/test/pleroma/web/rich_media/helpers_test.exs
new file mode 100644
index 000000000..689854fb6
--- /dev/null
+++ b/test/pleroma/web/rich_media/helpers_test.exs
@@ -0,0 +1,84 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Web.RichMedia.HelpersTest do
+  use Pleroma.DataCase
+
+  alias Pleroma.Web.CommonAPI
+  alias Pleroma.Web.RichMedia.Helpers
+
+  import Pleroma.Factory
+  import Tesla.Mock
+
+  setup do
+    mock(fn env -> apply(HttpRequestMock, :request, [env]) end)
+
+    :ok
+  end
+
+  setup do: clear_config([:rich_media, :enabled])
+
+  test "refuses to crawl incomplete URLs" do
+    user = insert(:user)
+
+    {:ok, activity} =
+      CommonAPI.post(user, %{
+        status: "[test](example.com/ogp)",
+        content_type: "text/markdown"
+      })
+
+    clear_config([:rich_media, :enabled], true)
+
+    assert %{} == Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
+  end
+
+  test "refuses to crawl malformed URLs" do
+    user = insert(:user)
+
+    {:ok, activity} =
+      CommonAPI.post(user, %{
+        status: "[test](example.com[]/ogp)",
+        content_type: "text/markdown"
+      })
+
+    clear_config([:rich_media, :enabled], true)
+
+    assert %{} == Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
+  end
+
+  test "crawls valid, complete URLs" do
+    user = insert(:user)
+
+    {:ok, activity} =
+      CommonAPI.post(user, %{
+        status: "[test](https://example.com/ogp)",
+        content_type: "text/markdown"
+      })
+
+    clear_config([:rich_media, :enabled], true)
+
+    assert %{page_url: "https://example.com/ogp", rich_media: _} =
+             Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
+  end
+
+  test "refuses to crawl URLs of private network from posts" do
+    user = insert(:user)
+
+    {:ok, activity} =
+      CommonAPI.post(user, %{status: "http://127.0.0.1:4000/notice/9kCP7VNyPJXFOXDrgO"})
+
+    {:ok, activity2} = CommonAPI.post(user, %{status: "https://10.111.10.1/notice/9kCP7V"})
+    {:ok, activity3} = CommonAPI.post(user, %{status: "https://172.16.32.40/notice/9kCP7V"})
+    {:ok, activity4} = CommonAPI.post(user, %{status: "https://192.168.10.40/notice/9kCP7V"})
+    {:ok, activity5} = CommonAPI.post(user, %{status: "https://pleroma.local/notice/9kCP7V"})
+
+    clear_config([:rich_media, :enabled], true)
+
+    assert %{} = Helpers.fetch_data_for_activity(activity)
+    assert %{} = Helpers.fetch_data_for_activity(activity2)
+    assert %{} = Helpers.fetch_data_for_activity(activity3)
+    assert %{} = Helpers.fetch_data_for_activity(activity4)
+    assert %{} = Helpers.fetch_data_for_activity(activity5)
+  end
+end
diff --git a/test/pleroma/web/rich_media/parser/ttl/aws_signed_url_test.exs b/test/pleroma/web/rich_media/parser/ttl/aws_signed_url_test.exs
new file mode 100644
index 000000000..0c8203417
--- /dev/null
+++ b/test/pleroma/web/rich_media/parser/ttl/aws_signed_url_test.exs
@@ -0,0 +1,81 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrlTest do
+  # Relies on Cachex, needs to be synchronous
+  use Pleroma.DataCase
+
+  test "s3 signed url is parsed correct for expiration time" do
+    url = "https://pleroma.social/amz"
+
+    {:ok, timestamp} =
+      Timex.now()
+      |> DateTime.truncate(:second)
+      |> Timex.format("{ISO:Basic:Z}")
+
+    # in seconds
+    valid_till = 30
+
+    metadata = construct_metadata(timestamp, valid_till, url)
+
+    expire_time =
+      Timex.parse!(timestamp, "{ISO:Basic:Z}") |> Timex.to_unix() |> Kernel.+(valid_till)
+
+    assert {:ok, expire_time} == Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl.ttl(metadata, url)
+  end
+
+  test "s3 signed url is parsed and correct ttl is set for rich media" do
+    url = "https://pleroma.social/amz"
+
+    {:ok, timestamp} =
+      Timex.now()
+      |> DateTime.truncate(:second)
+      |> Timex.format("{ISO:Basic:Z}")
+
+    # in seconds
+    valid_till = 30
+
+    metadata = construct_metadata(timestamp, valid_till, url)
+
+    body = """
+    <meta name="twitter:card" content="Pleroma" />
+    <meta name="twitter:site" content="Pleroma" />
+    <meta name="twitter:title" content="Pleroma" />
+    <meta name="twitter:description" content="Pleroma" />
+    <meta name="twitter:image" content="#{Map.get(metadata, :image)}" />
+    """
+
+    Tesla.Mock.mock(fn
+      %{
+        method: :get,
+        url: "https://pleroma.social/amz"
+      } ->
+        %Tesla.Env{status: 200, body: body}
+    end)
+
+    Cachex.put(:rich_media_cache, url, metadata)
+
+    Pleroma.Web.RichMedia.Parser.set_ttl_based_on_image(metadata, url)
+
+    {:ok, cache_ttl} = Cachex.ttl(:rich_media_cache, url)
+
+    # as there is delay in setting and pulling the data from cache we ignore 1 second
+    # make it 2 seconds for flakyness
+    assert_in_delta(valid_till * 1000, cache_ttl, 2000)
+  end
+
+  defp construct_s3_url(timestamp, valid_till) do
+    "https://pleroma.s3.ap-southeast-1.amazonaws.com/sachin%20%281%29%20_a%20-%25%2Aasdasd%20BNN%20bnnn%20.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIBLWWK6RGDQXDLJQ%2F20190716%2Fap-southeast-1%2Fs3%2Faws4_request&X-Amz-Date=#{timestamp}&X-Amz-Expires=#{valid_till}&X-Amz-Signature=04ffd6b98634f4b1bbabc62e0fac4879093cd54a6eed24fe8eb38e8369526bbf&X-Amz-SignedHeaders=host"
+  end
+
+  defp construct_metadata(timestamp, valid_till, url) do
+    %{
+      image: construct_s3_url(timestamp, valid_till),
+      site: "Pleroma",
+      title: "Pleroma",
+      description: "Pleroma",
+      url: url
+    }
+  end
+end
diff --git a/test/pleroma/web/rich_media/parser_test.exs b/test/pleroma/web/rich_media/parser_test.exs
new file mode 100644
index 000000000..2f363b012
--- /dev/null
+++ b/test/pleroma/web/rich_media/parser_test.exs
@@ -0,0 +1,176 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Web.RichMedia.ParserTest do
+  use ExUnit.Case, async: true
+
+  alias Pleroma.Web.RichMedia.Parser
+
+  setup do
+    Tesla.Mock.mock(fn
+      %{
+        method: :get,
+        url: "http://example.com/ogp"
+      } ->
+        %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}
+
+      %{
+        method: :get,
+        url: "http://example.com/non-ogp"
+      } ->
+        %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/non_ogp_embed.html")}
+
+      %{
+        method: :get,
+        url: "http://example.com/ogp-missing-title"
+      } ->
+        %Tesla.Env{
+          status: 200,
+          body: File.read!("test/fixtures/rich_media/ogp-missing-title.html")
+        }
+
+      %{
+        method: :get,
+        url: "http://example.com/twitter-card"
+      } ->
+        %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")}
+
+      %{
+        method: :get,
+        url: "http://example.com/oembed"
+      } ->
+        %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.html")}
+
+      %{
+        method: :get,
+        url: "http://example.com/oembed.json"
+      } ->
+        %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.json")}
+
+      %{method: :get, url: "http://example.com/empty"} ->
+        %Tesla.Env{status: 200, body: "hello"}
+
+      %{method: :get, url: "http://example.com/malformed"} ->
+        %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/malformed-data.html")}
+
+      %{method: :get, url: "http://example.com/error"} ->
+        {:error, :overload}
+
+      %{
+        method: :head,
+        url: "http://example.com/huge-page"
+      } ->
+        %Tesla.Env{
+          status: 200,
+          headers: [{"content-length", "2000001"}, {"content-type", "text/html"}]
+        }
+
+      %{
+        method: :head,
+        url: "http://example.com/pdf-file"
+      } ->
+        %Tesla.Env{
+          status: 200,
+          headers: [{"content-length", "1000000"}, {"content-type", "application/pdf"}]
+        }
+
+      %{method: :head} ->
+        %Tesla.Env{status: 404, body: "", headers: []}
+    end)
+
+    :ok
+  end
+
+  test "returns error when no metadata present" do
+    assert {:error, _} = Parser.parse("http://example.com/empty")
+  end
+
+  test "doesn't just add a title" do
+    assert {:error, {:invalid_metadata, _}} = Parser.parse("http://example.com/non-ogp")
+  end
+
+  test "parses ogp" do
+    assert Parser.parse("http://example.com/ogp") ==
+             {:ok,
+              %{
+                "image" => "http://ia.media-imdb.com/images/rock.jpg",
+                "title" => "The Rock",
+                "description" =>
+                  "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
+                "type" => "video.movie",
+                "url" => "http://example.com/ogp"
+              }}
+  end
+
+  test "falls back to <title> when ogp:title is missing" do
+    assert Parser.parse("http://example.com/ogp-missing-title") ==
+             {:ok,
+              %{
+                "image" => "http://ia.media-imdb.com/images/rock.jpg",
+                "title" => "The Rock (1996)",
+                "description" =>
+                  "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
+                "type" => "video.movie",
+                "url" => "http://example.com/ogp-missing-title"
+              }}
+  end
+
+  test "parses twitter card" do
+    assert Parser.parse("http://example.com/twitter-card") ==
+             {:ok,
+              %{
+                "card" => "summary",
+                "site" => "@flickr",
+                "image" => "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg",
+                "title" => "Small Island Developing States Photo Submission",
+                "description" => "View the album on Flickr.",
+                "url" => "http://example.com/twitter-card"
+              }}
+  end
+
+  test "parses OEmbed" do
+    assert Parser.parse("http://example.com/oembed") ==
+             {:ok,
+              %{
+                "author_name" => "‮‭‬bees‬",
+                "author_url" => "https://www.flickr.com/photos/bees/",
+                "cache_age" => 3600,
+                "flickr_type" => "photo",
+                "height" => "768",
+                "html" =>
+                  "<a data-flickr-embed=\"true\" href=\"https://www.flickr.com/photos/bees/2362225867/\" title=\"Bacon Lollys by ‮‭‬bees‬, on Flickr\"><img src=\"https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_b.jpg\" width=\"1024\" height=\"768\" alt=\"Bacon Lollys\"></a><script async src=\"https://embedr.flickr.com/assets/client-code.js\" charset=\"utf-8\"></script>",
+                "license" => "All Rights Reserved",
+                "license_id" => 0,
+                "provider_name" => "Flickr",
+                "provider_url" => "https://www.flickr.com/",
+                "thumbnail_height" => 150,
+                "thumbnail_url" =>
+                  "https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_q.jpg",
+                "thumbnail_width" => 150,
+                "title" => "Bacon Lollys",
+                "type" => "photo",
+                "url" => "http://example.com/oembed",
+                "version" => "1.0",
+                "web_page" => "https://www.flickr.com/photos/bees/2362225867/",
+                "web_page_short_url" => "https://flic.kr/p/4AK2sc",
+                "width" => "1024"
+              }}
+  end
+
+  test "rejects invalid OGP data" do
+    assert {:error, _} = Parser.parse("http://example.com/malformed")
+  end
+
+  test "returns error if getting page was not successful" do
+    assert {:error, :overload} = Parser.parse("http://example.com/error")
+  end
+
+  test "does a HEAD request to check if the body is too large" do
+    assert {:error, :body_too_large} = Parser.parse("http://example.com/huge-page")
+  end
+
+  test "does a HEAD request to check if the body is html" do
+    assert {:error, {:content_type, _}} = Parser.parse("http://example.com/pdf-file")
+  end
+end
diff --git a/test/pleroma/web/rich_media/parsers/twitter_card_test.exs b/test/pleroma/web/rich_media/parsers/twitter_card_test.exs
new file mode 100644
index 000000000..2aacd29a3
--- /dev/null
+++ b/test/pleroma/web/rich_media/parsers/twitter_card_test.exs
@@ -0,0 +1,127 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
+  use ExUnit.Case, async: true
+  alias Pleroma.Web.RichMedia.Parsers.TwitterCard
+
+  test "returns error when html not contains twitter card" do
+    assert TwitterCard.parse([{"html", [], [{"head", [], []}, {"body", [], []}]}], %{}) == %{}
+  end
+
+  test "parses twitter card with only name attributes" do
+    html =
+      File.read!("test/fixtures/nypd-facial-recognition-children-teenagers3.html")
+      |> Floki.parse_document!()
+
+    assert TwitterCard.parse(html, %{}) ==
+             %{
+               "app:id:googleplay" => "com.nytimes.android",
+               "app:name:googleplay" => "NYTimes",
+               "app:url:googleplay" => "nytimes://reader/id/100000006583622",
+               "site" => nil,
+               "description" =>
+                 "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+               "image" =>
+                 "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
+               "type" => "article",
+               "url" =>
+                 "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+               "title" =>
+                 "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database."
+             }
+  end
+
+  test "parses twitter card with only property attributes" do
+    html =
+      File.read!("test/fixtures/nypd-facial-recognition-children-teenagers2.html")
+      |> Floki.parse_document!()
+
+    assert TwitterCard.parse(html, %{}) ==
+             %{
+               "card" => "summary_large_image",
+               "description" =>
+                 "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+               "image" =>
+                 "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
+               "image:alt" => "",
+               "title" =>
+                 "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+               "url" =>
+                 "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+               "type" => "article"
+             }
+  end
+
+  test "parses twitter card with name & property attributes" do
+    html =
+      File.read!("test/fixtures/nypd-facial-recognition-children-teenagers.html")
+      |> Floki.parse_document!()
+
+    assert TwitterCard.parse(html, %{}) ==
+             %{
+               "app:id:googleplay" => "com.nytimes.android",
+               "app:name:googleplay" => "NYTimes",
+               "app:url:googleplay" => "nytimes://reader/id/100000006583622",
+               "card" => "summary_large_image",
+               "description" =>
+                 "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+               "image" =>
+                 "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
+               "image:alt" => "",
+               "site" => nil,
+               "title" =>
+                 "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+               "url" =>
+                 "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+               "type" => "article"
+             }
+  end
+
+  test "respect only first title tag on the page" do
+    image_path =
+      "https://assets.atlasobscura.com/media/W1siZiIsInVwbG9hZHMvYXNzZXRzLzkwYzgyMzI4LThlMDUtNGRiNS05MDg3LTUzMGUxZTM5N2RmMmVkOTM5ZDM4MGM4OTIx" <>
+        "YTQ5MF9EQVIgZXhodW1hdGlvbiBvZiBNYXJnYXJldCBDb3JiaW4gZ3JhdmUgMTkyNi5qcGciXSxbInAiLCJjb252ZXJ0IiwiIl0sWyJwIiwiY29udmVydCIsIi1xdWFsaXR5IDgxIC1hdXRvLW9" <>
+        "yaWVudCJdLFsicCIsInRodW1iIiwiNjAweD4iXV0/DAR%20exhumation%20of%20Margaret%20Corbin%20grave%201926.jpg"
+
+    html =
+      File.read!("test/fixtures/margaret-corbin-grave-west-point.html") |> Floki.parse_document!()
+
+    assert TwitterCard.parse(html, %{}) ==
+             %{
+               "site" => "@atlasobscura",
+               "title" => "The Missing Grave of Margaret Corbin, Revolutionary War Veteran",
+               "card" => "summary_large_image",
+               "image" => image_path,
+               "description" =>
+                 "She's the only woman veteran honored with a monument at West Point. But where was she buried?",
+               "site_name" => "Atlas Obscura",
+               "type" => "article",
+               "url" => "http://www.atlasobscura.com/articles/margaret-corbin-grave-west-point"
+             }
+  end
+
+  test "takes first founded title in html head if there is html markup error" do
+    html =
+      File.read!("test/fixtures/nypd-facial-recognition-children-teenagers4.html")
+      |> Floki.parse_document!()
+
+    assert TwitterCard.parse(html, %{}) ==
+             %{
+               "site" => nil,
+               "title" =>
+                 "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+               "app:id:googleplay" => "com.nytimes.android",
+               "app:name:googleplay" => "NYTimes",
+               "app:url:googleplay" => "nytimes://reader/id/100000006583622",
+               "description" =>
+                 "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+               "image" =>
+                 "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
+               "type" => "article",
+               "url" =>
+                 "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
+             }
+  end
+end