1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
|
# Pleroma: A lightweight social networking server
# Copyright © 2017-2020 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parser do
@options [
pool: :media,
max_body: 2_000_000
]
defp parsers do
Pleroma.Config.get([:rich_media, :parsers])
end
def parse(nil), do: {:error, "No URL provided"}
if Pleroma.Config.get(:env) == :test do
def parse(url), do: parse_url(url)
else
def parse(url) do
try do
Cachex.fetch!(:rich_media_cache, url, fn _ ->
{:commit, parse_url(url)}
end)
|> set_ttl_based_on_image(url)
rescue
e ->
{:error, "Cachex error: #{inspect(e)}"}
end
end
end
@doc """
Set the rich media cache based on the expiration time of image.
Adopt behaviour `Pleroma.Web.RichMedia.Parser.TTL`
## Example
defmodule MyModule do
@behaviour Pleroma.Web.RichMedia.Parser.TTL
def ttl(data, url) do
image_url = Map.get(data, :image)
# do some parsing in the url and get the ttl of the image
# and return ttl is unix time
parse_ttl_from_url(image_url)
end
end
Define the module in the config
config :pleroma, :rich_media,
ttl_setters: [MyModule]
"""
def set_ttl_based_on_image({:ok, data}, url) do
with {:ok, nil} <- Cachex.ttl(:rich_media_cache, url),
ttl when is_number(ttl) <- get_ttl_from_image(data, url) do
Cachex.expire_at(:rich_media_cache, url, ttl * 1000)
{:ok, data}
else
_ ->
{:ok, data}
end
end
defp get_ttl_from_image(data, url) do
Pleroma.Config.get([:rich_media, :ttl_setters])
|> Enum.reduce({:ok, nil}, fn
module, {:ok, _ttl} ->
module.ttl(data, url)
_, error ->
error
end)
end
defp parse_url(url) do
opts =
if Application.get_env(:tesla, :adapter) == Tesla.Adapter.Hackney do
Keyword.merge(@options,
recv_timeout: 2_000,
with_body: true
)
else
@options
end
try do
{:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url, [], adapter: opts)
html
|> parse_html()
|> maybe_parse()
|> Map.put(:url, url)
|> clean_parsed_data()
|> check_parsed_data()
rescue
e ->
{:error, "Parsing error: #{inspect(e)} #{inspect(__STACKTRACE__)}"}
end
end
defp parse_html(html), do: Floki.parse_document!(html)
defp maybe_parse(html) do
Enum.reduce_while(parsers(), %{}, fn parser, acc ->
case parser.parse(html, acc) do
{:ok, data} -> {:halt, data}
{:error, _msg} -> {:cont, acc}
end
end)
end
defp check_parsed_data(%{title: title} = data)
when is_binary(title) and byte_size(title) > 0 do
{:ok, data}
end
defp check_parsed_data(data) do
{:error, "Found metadata was invalid or incomplete: #{inspect(data)}"}
end
defp clean_parsed_data(data) do
data
|> Enum.reject(fn {key, val} ->
with {:ok, _} <- Jason.encode(%{key => val}) do
false
else
_ -> true
end
end)
|> Map.new()
end
end
|