diff options
Diffstat (limited to 'test/pleroma/web/rich_media')
-rw-r--r-- | test/pleroma/web/rich_media/helpers_test.exs | 84 | ||||
-rw-r--r-- | test/pleroma/web/rich_media/parser/ttl/aws_signed_url_test.exs | 81 | ||||
-rw-r--r-- | test/pleroma/web/rich_media/parser_test.exs | 176 | ||||
-rw-r--r-- | test/pleroma/web/rich_media/parsers/twitter_card_test.exs | 127 |
4 files changed, 468 insertions, 0 deletions
diff --git a/test/pleroma/web/rich_media/helpers_test.exs b/test/pleroma/web/rich_media/helpers_test.exs new file mode 100644 index 000000000..689854fb6 --- /dev/null +++ b/test/pleroma/web/rich_media/helpers_test.exs @@ -0,0 +1,84 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/> +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Web.RichMedia.HelpersTest do + use Pleroma.DataCase + + alias Pleroma.Web.CommonAPI + alias Pleroma.Web.RichMedia.Helpers + + import Pleroma.Factory + import Tesla.Mock + + setup do + mock(fn env -> apply(HttpRequestMock, :request, [env]) end) + + :ok + end + + setup do: clear_config([:rich_media, :enabled]) + + test "refuses to crawl incomplete URLs" do + user = insert(:user) + + {:ok, activity} = + CommonAPI.post(user, %{ + status: "[test](example.com/ogp)", + content_type: "text/markdown" + }) + + clear_config([:rich_media, :enabled], true) + + assert %{} == Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) + end + + test "refuses to crawl malformed URLs" do + user = insert(:user) + + {:ok, activity} = + CommonAPI.post(user, %{ + status: "[test](example.com[]/ogp)", + content_type: "text/markdown" + }) + + clear_config([:rich_media, :enabled], true) + + assert %{} == Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) + end + + test "crawls valid, complete URLs" do + user = insert(:user) + + {:ok, activity} = + CommonAPI.post(user, %{ + status: "[test](https://example.com/ogp)", + content_type: "text/markdown" + }) + + clear_config([:rich_media, :enabled], true) + + assert %{page_url: "https://example.com/ogp", rich_media: _} = + Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) + end + + test "refuses to crawl URLs of private network from posts" do + user = insert(:user) + + {:ok, activity} = + CommonAPI.post(user, %{status: "http://127.0.0.1:4000/notice/9kCP7VNyPJXFOXDrgO"}) + + {:ok, activity2} = CommonAPI.post(user, %{status: "https://10.111.10.1/notice/9kCP7V"}) + {:ok, activity3} = CommonAPI.post(user, %{status: "https://172.16.32.40/notice/9kCP7V"}) + {:ok, activity4} = CommonAPI.post(user, %{status: "https://192.168.10.40/notice/9kCP7V"}) + {:ok, activity5} = CommonAPI.post(user, %{status: "https://pleroma.local/notice/9kCP7V"}) + + clear_config([:rich_media, :enabled], true) + + assert %{} = Helpers.fetch_data_for_activity(activity) + assert %{} = Helpers.fetch_data_for_activity(activity2) + assert %{} = Helpers.fetch_data_for_activity(activity3) + assert %{} = Helpers.fetch_data_for_activity(activity4) + assert %{} = Helpers.fetch_data_for_activity(activity5) + end +end diff --git a/test/pleroma/web/rich_media/parser/ttl/aws_signed_url_test.exs b/test/pleroma/web/rich_media/parser/ttl/aws_signed_url_test.exs new file mode 100644 index 000000000..0c8203417 --- /dev/null +++ b/test/pleroma/web/rich_media/parser/ttl/aws_signed_url_test.exs @@ -0,0 +1,81 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/> +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrlTest do + # Relies on Cachex, needs to be synchronous + use Pleroma.DataCase + + test "s3 signed url is parsed correct for expiration time" do + url = "https://pleroma.social/amz" + + {:ok, timestamp} = + Timex.now() + |> DateTime.truncate(:second) + |> Timex.format("{ISO:Basic:Z}") + + # in seconds + valid_till = 30 + + metadata = construct_metadata(timestamp, valid_till, url) + + expire_time = + Timex.parse!(timestamp, "{ISO:Basic:Z}") |> Timex.to_unix() |> Kernel.+(valid_till) + + assert {:ok, expire_time} == Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl.ttl(metadata, url) + end + + test "s3 signed url is parsed and correct ttl is set for rich media" do + url = "https://pleroma.social/amz" + + {:ok, timestamp} = + Timex.now() + |> DateTime.truncate(:second) + |> Timex.format("{ISO:Basic:Z}") + + # in seconds + valid_till = 30 + + metadata = construct_metadata(timestamp, valid_till, url) + + body = """ + <meta name="twitter:card" content="Pleroma" /> + <meta name="twitter:site" content="Pleroma" /> + <meta name="twitter:title" content="Pleroma" /> + <meta name="twitter:description" content="Pleroma" /> + <meta name="twitter:image" content="#{Map.get(metadata, :image)}" /> + """ + + Tesla.Mock.mock(fn + %{ + method: :get, + url: "https://pleroma.social/amz" + } -> + %Tesla.Env{status: 200, body: body} + end) + + Cachex.put(:rich_media_cache, url, metadata) + + Pleroma.Web.RichMedia.Parser.set_ttl_based_on_image(metadata, url) + + {:ok, cache_ttl} = Cachex.ttl(:rich_media_cache, url) + + # as there is delay in setting and pulling the data from cache we ignore 1 second + # make it 2 seconds for flakyness + assert_in_delta(valid_till * 1000, cache_ttl, 2000) + end + + defp construct_s3_url(timestamp, valid_till) do + "https://pleroma.s3.ap-southeast-1.amazonaws.com/sachin%20%281%29%20_a%20-%25%2Aasdasd%20BNN%20bnnn%20.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIBLWWK6RGDQXDLJQ%2F20190716%2Fap-southeast-1%2Fs3%2Faws4_request&X-Amz-Date=#{timestamp}&X-Amz-Expires=#{valid_till}&X-Amz-Signature=04ffd6b98634f4b1bbabc62e0fac4879093cd54a6eed24fe8eb38e8369526bbf&X-Amz-SignedHeaders=host" + end + + defp construct_metadata(timestamp, valid_till, url) do + %{ + image: construct_s3_url(timestamp, valid_till), + site: "Pleroma", + title: "Pleroma", + description: "Pleroma", + url: url + } + end +end diff --git a/test/pleroma/web/rich_media/parser_test.exs b/test/pleroma/web/rich_media/parser_test.exs new file mode 100644 index 000000000..2f363b012 --- /dev/null +++ b/test/pleroma/web/rich_media/parser_test.exs @@ -0,0 +1,176 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/> +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Web.RichMedia.ParserTest do + use ExUnit.Case, async: true + + alias Pleroma.Web.RichMedia.Parser + + setup do + Tesla.Mock.mock(fn + %{ + method: :get, + url: "http://example.com/ogp" + } -> + %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")} + + %{ + method: :get, + url: "http://example.com/non-ogp" + } -> + %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/non_ogp_embed.html")} + + %{ + method: :get, + url: "http://example.com/ogp-missing-title" + } -> + %Tesla.Env{ + status: 200, + body: File.read!("test/fixtures/rich_media/ogp-missing-title.html") + } + + %{ + method: :get, + url: "http://example.com/twitter-card" + } -> + %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")} + + %{ + method: :get, + url: "http://example.com/oembed" + } -> + %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.html")} + + %{ + method: :get, + url: "http://example.com/oembed.json" + } -> + %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.json")} + + %{method: :get, url: "http://example.com/empty"} -> + %Tesla.Env{status: 200, body: "hello"} + + %{method: :get, url: "http://example.com/malformed"} -> + %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/malformed-data.html")} + + %{method: :get, url: "http://example.com/error"} -> + {:error, :overload} + + %{ + method: :head, + url: "http://example.com/huge-page" + } -> + %Tesla.Env{ + status: 200, + headers: [{"content-length", "2000001"}, {"content-type", "text/html"}] + } + + %{ + method: :head, + url: "http://example.com/pdf-file" + } -> + %Tesla.Env{ + status: 200, + headers: [{"content-length", "1000000"}, {"content-type", "application/pdf"}] + } + + %{method: :head} -> + %Tesla.Env{status: 404, body: "", headers: []} + end) + + :ok + end + + test "returns error when no metadata present" do + assert {:error, _} = Parser.parse("http://example.com/empty") + end + + test "doesn't just add a title" do + assert {:error, {:invalid_metadata, _}} = Parser.parse("http://example.com/non-ogp") + end + + test "parses ogp" do + assert Parser.parse("http://example.com/ogp") == + {:ok, + %{ + "image" => "http://ia.media-imdb.com/images/rock.jpg", + "title" => "The Rock", + "description" => + "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.", + "type" => "video.movie", + "url" => "http://example.com/ogp" + }} + end + + test "falls back to <title> when ogp:title is missing" do + assert Parser.parse("http://example.com/ogp-missing-title") == + {:ok, + %{ + "image" => "http://ia.media-imdb.com/images/rock.jpg", + "title" => "The Rock (1996)", + "description" => + "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.", + "type" => "video.movie", + "url" => "http://example.com/ogp-missing-title" + }} + end + + test "parses twitter card" do + assert Parser.parse("http://example.com/twitter-card") == + {:ok, + %{ + "card" => "summary", + "site" => "@flickr", + "image" => "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg", + "title" => "Small Island Developing States Photo Submission", + "description" => "View the album on Flickr.", + "url" => "http://example.com/twitter-card" + }} + end + + test "parses OEmbed" do + assert Parser.parse("http://example.com/oembed") == + {:ok, + %{ + "author_name" => "bees", + "author_url" => "https://www.flickr.com/photos/bees/", + "cache_age" => 3600, + "flickr_type" => "photo", + "height" => "768", + "html" => + "<a data-flickr-embed=\"true\" href=\"https://www.flickr.com/photos/bees/2362225867/\" title=\"Bacon Lollys by bees, on Flickr\"><img src=\"https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_b.jpg\" width=\"1024\" height=\"768\" alt=\"Bacon Lollys\"></a><script async src=\"https://embedr.flickr.com/assets/client-code.js\" charset=\"utf-8\"></script>", + "license" => "All Rights Reserved", + "license_id" => 0, + "provider_name" => "Flickr", + "provider_url" => "https://www.flickr.com/", + "thumbnail_height" => 150, + "thumbnail_url" => + "https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_q.jpg", + "thumbnail_width" => 150, + "title" => "Bacon Lollys", + "type" => "photo", + "url" => "http://example.com/oembed", + "version" => "1.0", + "web_page" => "https://www.flickr.com/photos/bees/2362225867/", + "web_page_short_url" => "https://flic.kr/p/4AK2sc", + "width" => "1024" + }} + end + + test "rejects invalid OGP data" do + assert {:error, _} = Parser.parse("http://example.com/malformed") + end + + test "returns error if getting page was not successful" do + assert {:error, :overload} = Parser.parse("http://example.com/error") + end + + test "does a HEAD request to check if the body is too large" do + assert {:error, :body_too_large} = Parser.parse("http://example.com/huge-page") + end + + test "does a HEAD request to check if the body is html" do + assert {:error, {:content_type, _}} = Parser.parse("http://example.com/pdf-file") + end +end diff --git a/test/pleroma/web/rich_media/parsers/twitter_card_test.exs b/test/pleroma/web/rich_media/parsers/twitter_card_test.exs new file mode 100644 index 000000000..2aacd29a3 --- /dev/null +++ b/test/pleroma/web/rich_media/parsers/twitter_card_test.exs @@ -0,0 +1,127 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/> +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do + use ExUnit.Case, async: true + alias Pleroma.Web.RichMedia.Parsers.TwitterCard + + test "returns error when html not contains twitter card" do + assert TwitterCard.parse([{"html", [], [{"head", [], []}, {"body", [], []}]}], %{}) == %{} + end + + test "parses twitter card with only name attributes" do + html = + File.read!("test/fixtures/nypd-facial-recognition-children-teenagers3.html") + |> Floki.parse_document!() + + assert TwitterCard.parse(html, %{}) == + %{ + "app:id:googleplay" => "com.nytimes.android", + "app:name:googleplay" => "NYTimes", + "app:url:googleplay" => "nytimes://reader/id/100000006583622", + "site" => nil, + "description" => + "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.", + "image" => + "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg", + "type" => "article", + "url" => + "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html", + "title" => + "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database." + } + end + + test "parses twitter card with only property attributes" do + html = + File.read!("test/fixtures/nypd-facial-recognition-children-teenagers2.html") + |> Floki.parse_document!() + + assert TwitterCard.parse(html, %{}) == + %{ + "card" => "summary_large_image", + "description" => + "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.", + "image" => + "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg", + "image:alt" => "", + "title" => + "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.", + "url" => + "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html", + "type" => "article" + } + end + + test "parses twitter card with name & property attributes" do + html = + File.read!("test/fixtures/nypd-facial-recognition-children-teenagers.html") + |> Floki.parse_document!() + + assert TwitterCard.parse(html, %{}) == + %{ + "app:id:googleplay" => "com.nytimes.android", + "app:name:googleplay" => "NYTimes", + "app:url:googleplay" => "nytimes://reader/id/100000006583622", + "card" => "summary_large_image", + "description" => + "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.", + "image" => + "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg", + "image:alt" => "", + "site" => nil, + "title" => + "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.", + "url" => + "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html", + "type" => "article" + } + end + + test "respect only first title tag on the page" do + image_path = + "https://assets.atlasobscura.com/media/W1siZiIsInVwbG9hZHMvYXNzZXRzLzkwYzgyMzI4LThlMDUtNGRiNS05MDg3LTUzMGUxZTM5N2RmMmVkOTM5ZDM4MGM4OTIx" <> + "YTQ5MF9EQVIgZXhodW1hdGlvbiBvZiBNYXJnYXJldCBDb3JiaW4gZ3JhdmUgMTkyNi5qcGciXSxbInAiLCJjb252ZXJ0IiwiIl0sWyJwIiwiY29udmVydCIsIi1xdWFsaXR5IDgxIC1hdXRvLW9" <> + "yaWVudCJdLFsicCIsInRodW1iIiwiNjAweD4iXV0/DAR%20exhumation%20of%20Margaret%20Corbin%20grave%201926.jpg" + + html = + File.read!("test/fixtures/margaret-corbin-grave-west-point.html") |> Floki.parse_document!() + + assert TwitterCard.parse(html, %{}) == + %{ + "site" => "@atlasobscura", + "title" => "The Missing Grave of Margaret Corbin, Revolutionary War Veteran", + "card" => "summary_large_image", + "image" => image_path, + "description" => + "She's the only woman veteran honored with a monument at West Point. But where was she buried?", + "site_name" => "Atlas Obscura", + "type" => "article", + "url" => "http://www.atlasobscura.com/articles/margaret-corbin-grave-west-point" + } + end + + test "takes first founded title in html head if there is html markup error" do + html = + File.read!("test/fixtures/nypd-facial-recognition-children-teenagers4.html") + |> Floki.parse_document!() + + assert TwitterCard.parse(html, %{}) == + %{ + "site" => nil, + "title" => + "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.", + "app:id:googleplay" => "com.nytimes.android", + "app:name:googleplay" => "NYTimes", + "app:url:googleplay" => "nytimes://reader/id/100000006583622", + "description" => + "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.", + "image" => + "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg", + "type" => "article", + "url" => + "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html" + } + end +end |