aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Felder <feld@feld.me>2021-06-11 14:33:13 -0500
committerMark Felder <feld@feld.me>2021-06-11 14:35:05 -0500
commitbaf7fd2142a28ff2219c6be8906861490a9f0753 (patch)
tree673143ce191f70fddb6067ef50bb75e60bfe7a52
parent65137044c1d68eade974e5b5ac5caea4352e6a44 (diff)
downloadpleroma-baf7fd2142a28ff2219c6be8906861490a9f0753.tar.gz
Introduce a scrubber that filters only everything except breaks
Add more tests for scrub_html_and_truncate/2
-rw-r--r--lib/pleroma/web/metadata/utils.ex4
-rw-r--r--priv/scrubbers/breaks_only.ex15
-rw-r--r--test/pleroma/web/metadata/utils_test.exs16
3 files changed, 34 insertions, 1 deletions
diff --git a/lib/pleroma/web/metadata/utils.ex b/lib/pleroma/web/metadata/utils.ex
index 3c0dfb6ad..a519fbf01 100644
--- a/lib/pleroma/web/metadata/utils.ex
+++ b/lib/pleroma/web/metadata/utils.ex
@@ -6,6 +6,7 @@ defmodule Pleroma.Web.Metadata.Utils do
alias Pleroma.Activity
alias Pleroma.Emoji
alias Pleroma.Formatter
+ alias Pleroma.HTML
def scrub_html_and_truncate(%{data: %{"content" => content}} = object) do
content
@@ -21,8 +22,9 @@ defmodule Pleroma.Web.Metadata.Utils do
def scrub_html_and_truncate(content, max_length \\ 200) when is_binary(content) do
content
|> Emoji.Formatter.demojify()
+ |> HTML.filter_tags(Pleroma.HTML.Scrubber.BreaksOnly)
|> HtmlEntities.decode()
- |> String.replace(~r/<br\s?\/?>/, " ")
+ |> String.replace(~r/<br\s?\/?>/, "&#10;&#13;")
|> Formatter.truncate(max_length)
end
diff --git a/priv/scrubbers/breaks_only.ex b/priv/scrubbers/breaks_only.ex
new file mode 100644
index 000000000..f952c7224
--- /dev/null
+++ b/priv/scrubbers/breaks_only.ex
@@ -0,0 +1,15 @@
+defmodule Pleroma.HTML.Scrubber.BreaksOnly do
+ @moduledoc """
+ An HTML scrubbing policy which limits to linebreaks only.
+ """
+
+ require FastSanitize.Sanitizer.Meta
+ alias FastSanitize.Sanitizer.Meta
+
+ Meta.strip_comments()
+
+ # linebreaks only
+ Meta.allow_tag_with_these_attributes(:br, [])
+
+ Meta.strip_everything_not_covered()
+end
diff --git a/test/pleroma/web/metadata/utils_test.exs b/test/pleroma/web/metadata/utils_test.exs
index 8cdfb8ecc..b5d3e3566 100644
--- a/test/pleroma/web/metadata/utils_test.exs
+++ b/test/pleroma/web/metadata/utils_test.exs
@@ -30,5 +30,21 @@ defmodule Pleroma.Web.Metadata.UtilsTest do
test "it truncates to specified chars (binaries)" do
assert Utils.scrub_html_and_truncate("Pleroma's really cool!", 10) == "Pleroma..."
end
+
+ # push notifications and link previews should be able to display newlines
+ test "it replaces <br> with compatible HTML entity (binaries)" do
+ assert Utils.scrub_html_and_truncate("First line<br>Second line") ==
+ "First line&#10;&#13;Second line"
+ end
+
+ test "it strips emojis (binaries)" do
+ assert Utils.scrub_html_and_truncate(
+ "Open the door get on the floor everybody walk the dinosaur :dinosaur:"
+ ) == "Open the door get on the floor everybody walk the dinosaur"
+ end
+
+ test "it strips HTML tags and other entities (binaries)" do
+ assert Utils.scrub_html_and_truncate("<title>my title</title> <p>and a paragraph&#33;</p>") == "my title and a paragraph!"
+ end
end
end