diff options
author | Maxim Filippov <colixer@gmail.com> | 2019-01-01 23:26:40 +0300 |
---|---|---|
committer | Maxim Filippov <colixer@gmail.com> | 2019-01-01 23:26:40 +0300 |
commit | 2aab4e03c3a2867abd4555dc776eebc8b0dba176 (patch) | |
tree | ddb3d4deda02ccae95c8177e3b0edebe966df896 /lib | |
parent | 32bed664714c616d4a75579b450e52dcff5d8134 (diff) | |
download | pleroma-2aab4e03c3a2867abd4555dc776eebc8b0dba176.tar.gz |
Add OGP parser
Diffstat (limited to 'lib')
-rw-r--r-- | lib/pleroma/web/rich_media/data.ex | 3 | ||||
-rw-r--r-- | lib/pleroma/web/rich_media/parser.ex | 14 | ||||
-rw-r--r-- | lib/pleroma/web/rich_media/parsers/ogp.ex | 30 |
3 files changed, 47 insertions, 0 deletions
diff --git a/lib/pleroma/web/rich_media/data.ex b/lib/pleroma/web/rich_media/data.ex new file mode 100644 index 000000000..403d1d341 --- /dev/null +++ b/lib/pleroma/web/rich_media/data.ex @@ -0,0 +1,3 @@ +defmodule Pleroma.Web.RichMedia.Data do + defstruct [:title, :type, :image, :url, :description] +end diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex new file mode 100644 index 000000000..d9c1684d5 --- /dev/null +++ b/lib/pleroma/web/rich_media/parser.ex @@ -0,0 +1,14 @@ +defmodule Pleroma.Web.RichMedia.Parser do + @parsers [Pleroma.Web.RichMedia.Parsers.OGP] + + def parse(url) do + {:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url) + + Enum.reduce_while(@parsers, %Pleroma.Web.RichMedia.Data{}, fn parser, acc -> + case parser.parse(html, acc) do + {:ok, data} -> {:halt, data} + {:error, _msg} -> {:cont, acc} + end + end) + end +end diff --git a/lib/pleroma/web/rich_media/parsers/ogp.ex b/lib/pleroma/web/rich_media/parsers/ogp.ex new file mode 100644 index 000000000..75084c7ee --- /dev/null +++ b/lib/pleroma/web/rich_media/parsers/ogp.ex @@ -0,0 +1,30 @@ +defmodule Pleroma.Web.RichMedia.Parsers.OGP do + def parse(html, data) do + with elements = [_ | _] <- get_elements(html), + ogp_data = + Enum.reduce(elements, data, fn el, acc -> + attributes = normalize_attributes(el) + + Map.merge(acc, attributes) + end) do + {:ok, ogp_data} + else + _e -> {:error, "No OGP metadata found"} + end + end + + defp get_elements(html) do + html |> Floki.find("meta[property^='og:']") + end + + defp normalize_attributes(tuple) do + {_tag, attributes, _children} = tuple + + data = + Enum.into(attributes, %{}, fn {name, value} -> + {name, String.trim_leading(value, "og:")} + end) + + %{String.to_atom(data["property"]) => data["content"]} + end +end |