Introduce event language detection

Signed-off-by: Thomas Citharel <tcit@tcit.fr>
This commit is contained in:
Thomas Citharel 2021-08-19 20:43:35 +02:00
parent 7c9b76765f
commit d577b07c6e
No known key found for this signature in database
GPG key ID: A061B9DDE0CA0773
16 changed files with 206 additions and 15 deletions

View file

@ -10,6 +10,7 @@ defmodule Mobilizon.Federation.ActivityPub.Types.Comments do
alias Mobilizon.Federation.ActivityStream.Convertible
alias Mobilizon.GraphQL.API.Utils, as: APIUtils
alias Mobilizon.Service.Activity.Comment, as: CommentActivity
alias Mobilizon.Service.LanguageDetection
alias Mobilizon.Share
alias Mobilizon.Tombstone
alias Mobilizon.Web.Endpoint
@ -127,6 +128,7 @@ defmodule Mobilizon.Federation.ActivityPub.Types.Comments do
),
tags <- ConverterUtils.fetch_tags(tags),
mentions <- Map.get(args, :mentions, []) ++ ConverterUtils.fetch_mentions(mentions),
lang <- Map.get(args, :language, "und"),
args <-
Map.merge(args, %{
actor_id: Map.get(args, :actor_id),
@ -141,7 +143,8 @@ defmodule Mobilizon.Federation.ActivityPub.Types.Comments do
if(is_nil(in_reply_to_comment),
do: nil,
else: Comment.get_thread_id(in_reply_to_comment)
)
),
language: if(lang == "und", do: LanguageDetection.detect(:comment, args), else: lang)
}) do
args
end

View file

@ -12,6 +12,7 @@ defmodule Mobilizon.Federation.ActivityPub.Types.Events do
alias Mobilizon.GraphQL.API.Utils, as: APIUtils
alias Mobilizon.Service.Activity.Event, as: EventActivity
alias Mobilizon.Service.Formatter.HTML
alias Mobilizon.Service.LanguageDetection
alias Mobilizon.Service.Notifications.Scheduler
alias Mobilizon.Share
alias Mobilizon.Tombstone
@ -234,6 +235,10 @@ defmodule Mobilizon.Federation.ActivityPub.Types.Events do
args
|> Map.put(:options, options)
|> Map.put_new(:language, "und")
|> Map.update!(:language, fn lang ->
if lang == "und", do: LanguageDetection.detect(:event, args), else: lang
end)
|> Map.update(:tags, [], &ConverterUtils.fetch_tags/1)
|> Map.update(:contacts, [], &ConverterUtils.fetch_actors/1)
end

View file

@ -8,6 +8,7 @@ defmodule Mobilizon.Federation.ActivityPub.Types.Posts do
alias Mobilizon.Federation.ActivityStream.Convertible
alias Mobilizon.Posts.Post
alias Mobilizon.Service.Activity.Post, as: PostsActivity
alias Mobilizon.Service.LanguageDetection
require Logger
import Mobilizon.Federation.ActivityPub.Utils, only: [make_create_data: 2, make_update_data: 2]
@ -17,7 +18,7 @@ defmodule Mobilizon.Federation.ActivityPub.Types.Posts do
@impl Entity
def create(args, additional) do
with args <- Map.update(args, :tags, [], &ConverterUtils.fetch_tags/1),
with args <- prepare_args(args),
{:ok, %Post{attributed_to_id: group_id, author_id: creator_id} = post} <-
Posts.create_post(args),
{:ok, _} <- PostsActivity.insert_activity(post, subject: "post_created"),
@ -37,7 +38,7 @@ defmodule Mobilizon.Federation.ActivityPub.Types.Posts do
@impl Entity
def update(%Post{} = post, args, additional) do
with args <- Map.update(args, :tags, [], &ConverterUtils.fetch_tags/1),
with args <- prepare_args(args),
{:ok, %Post{attributed_to_id: group_id, author_id: creator_id} = post} <-
Posts.update_post(post, args),
{:ok, _} <- PostsActivity.insert_activity(post, subject: "post_updated"),
@ -99,4 +100,13 @@ defmodule Mobilizon.Federation.ActivityPub.Types.Posts do
delete: :moderator
}
end
defp prepare_args(args) do
args
|> Map.update(:tags, [], &ConverterUtils.fetch_tags/1)
|> Map.put_new(:language, "und")
|> Map.update!(:language, fn lang ->
if lang == "und", do: LanguageDetection.detect(:post, args), else: lang
end)
end
end

View file

@ -36,6 +36,7 @@ defmodule Mobilizon.Federation.ActivityPub.Utils do
"https://www.w3.org/ns/activitystreams",
"https://litepub.social/context.jsonld",
%{
"@language" => "und",
"sc" => "http://schema.org#",
"ical" => "http://www.w3.org/2002/12/cal/ical#",
"pt" => "https://joinpeertube.org/ns#",
@ -91,7 +92,8 @@ defmodule Mobilizon.Federation.ActivityPub.Utils do
},
"PropertyValue" => "sc:PropertyValue",
"value" => "sc:value",
"propertyID" => "sc:propertyID"
"propertyID" => "sc:propertyID",
"inLanguage" => "sc:inLanguage"
}
]
}

View file

@ -83,7 +83,8 @@ defmodule Mobilizon.Federation.ActivityStream.Converter.Event do
mentions: mentions,
physical_address_id: address_id,
updated_at: object["updated"],
publish_at: object["published"]
publish_at: object["published"],
language: object["inLanguage"]
}
else
{:ok, %Actor{suspended: true}} ->
@ -128,7 +129,8 @@ defmodule Mobilizon.Federation.ActivityStream.Converter.Event do
"draft" => event.draft,
"ical:status" => event.status |> to_string |> String.upcase(),
"id" => event.url,
"url" => event.url
"url" => event.url,
"inLanguage" => event.language
}
|> maybe_add_physical_address(event)
|> maybe_add_event_picture(event)

View file

@ -54,6 +54,8 @@ defmodule Mobilizon.GraphQL.Schema.Discussions.CommentType do
field(:is_announcement, non_null(:boolean),
description: "Whether this comment needs to be announced to participants"
)
field(:language, non_null(:string), description: "The comment language")
end
@desc "The list of visibility options for a comment"
@ -89,6 +91,7 @@ defmodule Mobilizon.GraphQL.Schema.Discussions.CommentType do
arg(:text, non_null(:string), description: "The comment's body")
arg(:event_id, non_null(:id), description: "The event under which this comment is")
arg(:in_reply_to_comment_id, :id, description: "The comment ID this one replies to")
arg(:language, :string, description: "The comment language", default_value: "und")
arg(:is_announcement, :boolean, description: "Should this comment be announced to everyone?")
@ -99,6 +102,7 @@ defmodule Mobilizon.GraphQL.Schema.Discussions.CommentType do
field :update_comment, type: :comment do
arg(:text, non_null(:string), description: "The comment updated body")
arg(:comment_id, non_null(:id), description: "The comment ID")
arg(:language, :string, description: "The comment language", default_value: "und")
arg(:is_announcement, :boolean, description: "Should this comment be announced to everyone?")

View file

@ -104,6 +104,7 @@ defmodule Mobilizon.GraphQL.Schema.EventType do
field(:inserted_at, :datetime, description: "When the event was created")
field(:options, :event_options, description: "The event options")
field(:metadata, list_of(:event_metadata), description: "A key-value list of metadata")
field(:language, non_null(:string), description: "The event language")
end
@desc "The list of visibility options for an event"
@ -401,6 +402,7 @@ defmodule Mobilizon.GraphQL.Schema.EventType do
)
arg(:contacts, list_of(:contact), default_value: [], description: "The events contacts")
arg(:language, :string, description: "The event language", default_value: "und")
resolve(&Event.create_event/3)
end
@ -444,6 +446,7 @@ defmodule Mobilizon.GraphQL.Schema.EventType do
arg(:metadata, list_of(:event_metadata_input), description: "The event metadata")
arg(:draft, :boolean, description: "Whether or not the event is a draft")
arg(:contacts, list_of(:contact), default_value: [], description: "The events contacts")
arg(:language, :string, description: "The event language", default_value: "und")
resolve(&Event.update_event/3)
end

View file

@ -20,6 +20,7 @@ defmodule Mobilizon.GraphQL.Schema.PostType do
field(:publish_at, :datetime, description: "When the post was published")
field(:inserted_at, :datetime, description: "The post's creation date")
field(:updated_at, :datetime, description: "The post's last update date")
field(:language, non_null(:string), description: "The post language")
field(:tags, list_of(:tag),
resolve: &Tag.list_tags_for_post/3,
@ -71,6 +72,7 @@ defmodule Mobilizon.GraphQL.Schema.PostType do
arg(:draft, :boolean, default_value: false, description: "Whether the post is a draft")
arg(:visibility, :post_visibility, description: "The post's visibility")
arg(:publish_at, :datetime, description: "The post's publish date")
arg(:language, :string, description: "The post language", default_value: "und")
arg(:tags, list_of(:string),
default_value: [],
@ -93,6 +95,7 @@ defmodule Mobilizon.GraphQL.Schema.PostType do
arg(:attributed_to_id, :id, description: "The group the post is attributed to")
arg(:draft, :boolean, description: "Whether the post is a draft")
arg(:visibility, :post_visibility, description: "The post's visibility")
arg(:language, :string, description: "The post language", default_value: "und")
arg(:publish_at, :datetime,
description: "The time when the posts is going to be or has been published"

View file

@ -30,7 +30,8 @@ defmodule Mobilizon.Discussions.Comment do
mentions: [Mention.t()],
media: [Media.t()],
in_reply_to_comment: t,
origin_comment: t
origin_comment: t,
language: String.t()
}
# When deleting an event we only nihilify everything
@ -46,7 +47,8 @@ defmodule Mobilizon.Discussions.Comment do
:deleted_at,
:local,
:is_announcement,
:discussion_id
:discussion_id,
:language
]
@attrs @required_attrs ++ @optional_attrs
@ -60,6 +62,7 @@ defmodule Mobilizon.Discussions.Comment do
field(:deleted_at, :utc_datetime)
field(:published_at, :utc_datetime)
field(:is_announcement, :boolean, default: false)
field(:language, :string, default: "und")
belongs_to(:actor, Actor, foreign_key: :actor_id)
belongs_to(:attributed_to, Actor, foreign_key: :attributed_to_id)

View file

@ -62,7 +62,8 @@ defmodule Mobilizon.Events.Event do
mentions: [Mention.t()],
tags: [Tag.t()],
participants: [Actor.t()],
contacts: [Actor.t()]
contacts: [Actor.t()],
language: String.t()
}
@update_required_attrs [:title, :begins_on, :organizer_actor_id]
@ -83,7 +84,8 @@ defmodule Mobilizon.Events.Event do
:phone_address,
:picture_id,
:physical_address_id,
:attributed_to_id
:attributed_to_id,
:language
]
@attrs @required_attrs ++ @optional_attrs
@ -106,6 +108,7 @@ defmodule Mobilizon.Events.Event do
field(:online_address, :string)
field(:phone_address, :string)
field(:category, :string)
field(:language, :string, default: "und")
embeds_one(:options, EventOptions, on_replace: :delete)
embeds_one(:participant_stats, EventParticipantStats, on_replace: :update)

View file

@ -43,7 +43,8 @@ defmodule Mobilizon.Posts.Post do
attributed_to: Actor.t(),
picture: Media.t(),
media: [Media.t()],
tags: [Tag.t()]
tags: [Tag.t()],
language: String.t()
}
@primary_key {:id, Ecto.UUID, autogenerate: true}
@ -57,6 +58,7 @@ defmodule Mobilizon.Posts.Post do
field(:url, :string)
field(:publish_at, :utc_datetime)
field(:visibility, PostVisibility, default: :public)
field(:language, :string, default: "und")
belongs_to(:author, Actor)
belongs_to(:attributed_to, Actor)
belongs_to(:picture, Media, on_replace: :update)
@ -76,7 +78,7 @@ defmodule Mobilizon.Posts.Post do
:author_id,
:attributed_to_id
]
@optional_attrs [:picture_id, :local, :publish_at, :visibility]
@optional_attrs [:picture_id, :local, :publish_at, :visibility, :language]
@attrs @required_attrs ++ @optional_attrs
@doc false

View file

@ -0,0 +1,84 @@
defmodule Mobilizon.Service.LanguageDetection do
@moduledoc """
Detect the language of the event
"""
alias Mobilizon.Service.Formatter.HTML
@und "und"
@paasaa_languages Paasaa.Data.languages()
|> Map.values()
|> List.flatten()
|> Enum.map(fn {lang, _val} ->
lang
end)
@allow_listed_locales Mobilizon.Cldr.known_locale_names()
@type entity_type :: :event | :comment | :post
@spec detect(entity_type(), map()) :: String.t()
def detect(:event, %{title: title} = args) do
description = Map.get(args, :description)
if is_nil(description) or description == "" do
title
|> Paasaa.detect(whitelist: allow_listed_languages())
|> normalize()
else
sanitized_description = HTML.strip_tags_and_insert_spaces(description)
"#{title}\n\n#{sanitized_description}"
|> Paasaa.detect(whitelist: allow_listed_languages())
|> normalize()
end
end
def detect(:comment, %{text: text}) do
text
|> HTML.strip_tags_and_insert_spaces()
|> Paasaa.detect(whitelist: allow_listed_languages())
|> normalize()
end
def detect(:post, %{title: title} = args) do
body = Map.get(args, :body)
if is_nil(body) or body == "" do
title
|> Paasaa.detect(whitelist: allow_listed_languages())
|> normalize()
else
sanitized_body = HTML.strip_tags_and_insert_spaces(body)
"#{title}\n\n#{sanitized_body}"
|> Paasaa.detect(whitelist: allow_listed_languages())
|> normalize()
end
end
def detect(_, _), do: @und
@spec normalize(String.t()) :: String.t()
def normalize(""), do: @und
def normalize(language) do
case Cldr.AcceptLanguage.parse(language, Mobilizon.Cldr) do
{:ok, [{_, tag}]} ->
tag.language
_ ->
@und
end
end
def allow_listed_languages do
@paasaa_languages
|> Enum.map(fn lang ->
{__MODULE__.normalize(lang), lang}
end)
|> Enum.into(%{})
|> Map.take(@allow_listed_locales)
|> Map.values()
end
end

View file

@ -165,6 +165,7 @@ defmodule Mobilizon.Mixfile do
ref: "4361bd02b0b7f2cb5f8ac302bcd1210b57964a51"},
{:eblurhash, "~> 1.2"},
{:struct_access, "~> 1.1.2"},
{:paasaa, "~> 0.5.0"},
# Dev and test dependencies
{:phoenix_live_reload, "~> 1.2", only: [:dev, :e2e]},
{:ex_machina, "~> 2.3", only: [:dev, :test]},

View file

@ -100,6 +100,7 @@
"oauth2": {:hex, :oauth2, "2.0.0", "338382079fe16c514420fa218b0903f8ad2d4bfc0ad0c9f988867dfa246731b0", [:mix], [{:hackney, "~> 1.13", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm", "881b8364ac7385f9fddc7949379cbe3f7081da37233a1aa7aab844670a91e7e7"},
"oauther": {:hex, :oauther, "1.1.1", "7d8b16167bb587ecbcddd3f8792beb9ec3e7b65c1f8ebd86b8dd25318d535752", [:mix], [], "hexpm", "9374f4302045321874cccdc57eb975893643bd69c3b22bf1312dab5f06e5788e"},
"oban": {:hex, :oban, "2.8.0", "e44b19a30e30bb983099f55d59749316ff0eaf5dfef4214e1190738176653e50", [:mix], [{:ecto_sql, ">= 3.4.3", [hex: :ecto_sql, repo: "hexpm", optional: false]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:postgrex, "~> 0.14", [hex: :postgrex, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "2954a2ac418f7cc4217c0772a3dd3a70e2966240583b97f4126a489e1300a573"},
"paasaa": {:hex, :paasaa, "0.5.1", "58d8bf61902adfd1d04815a115f0eb3b996845c0360f1831854e21073411e822", [:mix], [], "hexpm", "571f1a33b8e184396a93fc18ee5331f2655c96ba9a6fc383dc675e4bc8fc7596"},
"parse_trans": {:hex, :parse_trans, "3.3.1", "16328ab840cc09919bd10dab29e431da3af9e9e7e7e6f0089dd5a2d2820011d8", [:rebar3], [], "hexpm", "07cd9577885f56362d414e8c4c4e6bdf10d43a8767abb92d24cbe8b24c54888b"},
"phoenix": {:hex, :phoenix, "1.5.10", "3ee7d5c17ff9626d72d374d8fc8909bf00f4323fd15549fbe3abbbd38b5299c8", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix_html, "~> 2.13 or ~> 3.0", [hex: :phoenix_html, repo: "hexpm", optional: true]}, {:phoenix_pubsub, "~> 2.0", [hex: :phoenix_pubsub, repo: "hexpm", optional: false]}, {:plug, "~> 1.10", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 1.0 or ~> 2.2", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:plug_crypto, "~> 1.1.2 or ~> 1.2", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "f9c2eaa5a8fe5a412610c6aa84ccdb6f3e92f333d4df7fbaeb0d5a157dbfb48d"},
"phoenix_ecto": {:hex, :phoenix_ecto, "4.4.0", "0672ed4e4808b3fbed494dded89958e22fb882de47a97634c0b13e7b0b5f7720", [:mix], [{:ecto, "~> 3.3", [hex: :ecto, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 2.14.2 or ~> 3.0", [hex: :phoenix_html, repo: "hexpm", optional: true]}, {:plug, "~> 1.9", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "09864e558ed31ee00bd48fcc1d4fc58ae9678c9e81649075431e69dbabb43cc1"},

View file

@ -0,0 +1,17 @@
defmodule Mobilizon.Storage.Repo.Migrations.AddLanguageToEntities do
use Ecto.Migration
def change do
alter table(:events) do
add(:language, :string, default: "und")
end
alter table(:comments) do
add(:language, :string, default: "und")
end
alter table(:posts) do
add(:language, :string, default: "und")
end
end
end

View file

@ -84,7 +84,8 @@ defmodule Mobilizon.Web.Resolvers.EventTest do
$attributed_to_id: ID,
$online_address: String,
$options: EventOptionsInput,
$draft: Boolean
$draft: Boolean,
$language: String
) {
createEvent(
title: $title,
@ -97,7 +98,8 @@ defmodule Mobilizon.Web.Resolvers.EventTest do
attributed_to_id: $attributed_to_id,
online_address: $online_address,
options: $options,
draft: $draft
draft: $draft,
language: $language
) {
id,
uuid,
@ -113,7 +115,8 @@ defmodule Mobilizon.Web.Resolvers.EventTest do
online_address,
phone_address,
category,
draft,
draft
language
options {
maximumAttendeeCapacity,
showRemainingAttendeeCapacity,
@ -636,6 +639,51 @@ defmodule Mobilizon.Web.Resolvers.EventTest do
assert json_response(res, 200)["data"]["createEvent"]["picture"]["url"]
end
test "create_event/3 creates an event with detected language", %{
conn: conn,
actor: %Actor{id: actor_id},
user: user
} do
res =
conn
|> auth_conn(user)
|> AbsintheHelpers.graphql_query(
query: @create_event_mutation,
variables: %{
title: "Come to my event",
description: "This should be long enough to get detected",
organizer_actor_id: actor_id,
begins_on: "2021-07-26T09:00:00Z"
}
)
assert res["errors"] == nil
assert res["data"]["createEvent"]["language"] == "en"
end
test "create_event/3 creates an event with manually set language", %{
conn: conn,
actor: %Actor{id: actor_id},
user: user
} do
res =
conn
|> auth_conn(user)
|> AbsintheHelpers.graphql_query(
query: @create_event_mutation,
variables: %{
title: "Come to my event",
description: "This should be long enough to get detected",
organizer_actor_id: actor_id,
begins_on: "2021-07-26T09:00:00Z",
language: "it"
}
)
assert res["errors"] == nil
assert res["data"]["createEvent"]["language"] == "it"
end
end
describe "create_event/3 on behalf of a group" do