diff options
author | Mark Felder <feld@feld.me> | 2021-06-11 14:33:13 -0500 |
---|---|---|
committer | Mark Felder <feld@feld.me> | 2021-06-11 14:35:05 -0500 |
commit | baf7fd2142a28ff2219c6be8906861490a9f0753 (patch) | |
tree | 673143ce191f70fddb6067ef50bb75e60bfe7a52 | |
parent | 65137044c1d68eade974e5b5ac5caea4352e6a44 (diff) | |
download | pleroma-baf7fd2142a28ff2219c6be8906861490a9f0753.tar.gz |
Introduce a scrubber that filters only everything except breaks
Add more tests for scrub_html_and_truncate/2
-rw-r--r-- | lib/pleroma/web/metadata/utils.ex | 4 | ||||
-rw-r--r-- | priv/scrubbers/breaks_only.ex | 15 | ||||
-rw-r--r-- | test/pleroma/web/metadata/utils_test.exs | 16 |
3 files changed, 34 insertions, 1 deletions
diff --git a/lib/pleroma/web/metadata/utils.ex b/lib/pleroma/web/metadata/utils.ex index 3c0dfb6ad..a519fbf01 100644 --- a/lib/pleroma/web/metadata/utils.ex +++ b/lib/pleroma/web/metadata/utils.ex @@ -6,6 +6,7 @@ defmodule Pleroma.Web.Metadata.Utils do alias Pleroma.Activity alias Pleroma.Emoji alias Pleroma.Formatter + alias Pleroma.HTML def scrub_html_and_truncate(%{data: %{"content" => content}} = object) do content @@ -21,8 +22,9 @@ defmodule Pleroma.Web.Metadata.Utils do def scrub_html_and_truncate(content, max_length \\ 200) when is_binary(content) do content |> Emoji.Formatter.demojify() + |> HTML.filter_tags(Pleroma.HTML.Scrubber.BreaksOnly) |> HtmlEntities.decode() - |> String.replace(~r/<br\s?\/?>/, " ") + |> String.replace(~r/<br\s?\/?>/, " ") |> Formatter.truncate(max_length) end diff --git a/priv/scrubbers/breaks_only.ex b/priv/scrubbers/breaks_only.ex new file mode 100644 index 000000000..f952c7224 --- /dev/null +++ b/priv/scrubbers/breaks_only.ex @@ -0,0 +1,15 @@ +defmodule Pleroma.HTML.Scrubber.BreaksOnly do + @moduledoc """ + An HTML scrubbing policy which limits to linebreaks only. + """ + + require FastSanitize.Sanitizer.Meta + alias FastSanitize.Sanitizer.Meta + + Meta.strip_comments() + + # linebreaks only + Meta.allow_tag_with_these_attributes(:br, []) + + Meta.strip_everything_not_covered() +end diff --git a/test/pleroma/web/metadata/utils_test.exs b/test/pleroma/web/metadata/utils_test.exs index 8cdfb8ecc..b5d3e3566 100644 --- a/test/pleroma/web/metadata/utils_test.exs +++ b/test/pleroma/web/metadata/utils_test.exs @@ -30,5 +30,21 @@ defmodule Pleroma.Web.Metadata.UtilsTest do test "it truncates to specified chars (binaries)" do assert Utils.scrub_html_and_truncate("Pleroma's really cool!", 10) == "Pleroma..." end + + # push notifications and link previews should be able to display newlines + test "it replaces <br> with compatible HTML entity (binaries)" do + assert Utils.scrub_html_and_truncate("First line<br>Second line") == + "First line Second line" + end + + test "it strips emojis (binaries)" do + assert Utils.scrub_html_and_truncate( + "Open the door get on the floor everybody walk the dinosaur :dinosaur:" + ) == "Open the door get on the floor everybody walk the dinosaur" + end + + test "it strips HTML tags and other entities (binaries)" do + assert Utils.scrub_html_and_truncate("<title>my title</title> <p>and a paragraph!</p>") == "my title and a paragraph!" + end end end |