Skip to content

Commit 75dbe9c

Browse files
authored
Add lingua plugin (#6)
* Add lingua plugin * enable only common languages * updates * ci fix * update * update lemmy bin * fixes * const * add more plugin hooks * readme
1 parent d61e7e6 commit 75dbe9c

File tree

9 files changed

+167
-25
lines changed

9 files changed

+167
-25
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ tests/log
1212
# plugins
1313
plugins/rust_allowed_voters/target
1414
plugins/rust_allowed_voters/Cargo.lock
15+
plugins/rust_lingua/target
16+
plugins/rust_lingua/Cargo.lock
1517
plugins/typescript_push_webhook/dist
1618
plugins/typescript_push_webhook/node_modules
1719

.woodpecker.yml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,18 @@ steps:
2626
when:
2727
- event: [pull_request, tag]
2828

29-
build_rust_plugin:
30-
image: rust:1.90
29+
build_rust_plugins:
30+
image: rust:1.91
3131
commands:
3232
- rustup target add wasm32-unknown-unknown
3333
- cd plugins/rust_allowed_voters
3434
- cargo build
3535
- cp target/wasm32-unknown-unknown/debug/rust_allowed_voters.wasm ..
36+
- cd -
37+
- cd plugins/rust_lingua
38+
- cargo build
39+
- cp target/wasm32-unknown-unknown/debug/rust_lingua.wasm ..
40+
- cd -
3641
when:
3742
- event: [pull_request, tag]
3843

@@ -77,6 +82,7 @@ steps:
7782
settings:
7883
files:
7984
- plugins/rust_allowed_voters.wasm
85+
- plugins/rust_lingua.wasm
8086
- plugins/go_replace_words.wasm
8187
- plugins/typescript_push_webhook.wasm
8288
title: ${CI_COMMIT_TAG##v}

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ cargo build
4242
cp target/wasm32-unknown-unknown/debug/rust_allowed_voters.wasm ..
4343
```
4444

45+
## Rust: Lingua
46+
47+
Automatic language tagging for new posts and comments using [Lingua](https://github.com/pemistahl/lingua-rs). Compilation steps are the same as above.
48+
4549
## Tests
4650

4751
This repository contains test cases for the plugins. To run them install `pnpm` and `postgresql`, with a database `postgres://lemmy:password@localhost:5432/lemmy`. Then compile all the plugins as described above, go into `tests` folder and execute `./run.sh`.

lemmy_server

-8.83 MB
Binary file not shown.

plugins/rust_allowed_voters/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22
name = "rust_allowed_voters"
33
version = "0.1.0"
4-
edition = "2021"
4+
edition = "2024"
55

66
[lib]
77
crate-type = ["cdylib"]

plugins/rust_allowed_voters/src/lib.rs

Lines changed: 11 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,31 @@
1-
use crate::json::Value;
21
use extism_pdk::*;
32
use lemmy_api_common::person::GetPersonDetailsResponse;
4-
use serde::Serialize;
5-
use std::collections::HashMap;
6-
7-
#[derive(Serialize)]
8-
struct Metadata {
9-
name: String,
10-
url: String,
11-
description: String,
12-
}
3+
use lemmy_api_common::plugin::PluginMetadata;
4+
use lemmy_api_common::post::PostLikeForm;
135

146
// Returns info about the plugin which gets included in /api/v4/site
157
//go:wasmexport metadata
168
#[plugin_fn]
17-
pub fn metadata() -> FnResult<Json<Metadata>> {
18-
Ok(Json(Metadata {
19-
name: "Allowed Voters".to_string(),
20-
url: "https://example.com".to_string(),
21-
description: "Plugin to test Lemmy feature".to_string(),
22-
}))
9+
pub fn metadata() -> FnResult<Json<PluginMetadata>> {
10+
Ok(Json(PluginMetadata::new(
11+
"Allowed Voters",
12+
"https://github.com/LemmyNet/lemmy-plugins/",
13+
"Prevent users with few posts from voting",
14+
)))
2315
}
2416

2517
#[plugin_fn]
26-
pub fn post_before_vote(
27-
Json(vote): Json<HashMap<String, Value>>,
28-
) -> FnResult<Json<HashMap<String, Value>>> {
18+
pub fn post_before_vote(Json(vote): Json<PostLikeForm>) -> FnResult<Json<PostLikeForm>> {
2919
let lemmy_url = config::get("lemmy_url")?.unwrap();
30-
let person_id = vote.get("person_id").unwrap();
20+
let person_id = vote.person_id.0;
3121
let req = HttpRequest {
3222
url: format!("{lemmy_url}api/v4/person?person_id={person_id}"),
3323
headers: Default::default(),
3424
method: Some("GET".to_string()),
3525
};
3626
let res: GetPersonDetailsResponse = http::request::<()>(&req, None)?.json()?;
3727
let person_post_count = res.person_view.person.post_count;
38-
info!("{:?}", vote);
39-
let is_upvote = vote.get("vote_is_upvote").and_then(Value::as_bool).unwrap();
28+
let is_upvote = vote.vote_is_upvote;
4029
if person_post_count < 5 && !is_upvote {
4130
return Err(Error::msg("user is not allowed to downvote").into());
4231
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[build]
2+
target = "wasm32-unknown-unknown"

plugins/rust_lingua/Cargo.toml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
[package]
2+
name = "rust_lingua"
3+
version = "0.1.0"
4+
edition = "2024"
5+
6+
[lib]
7+
crate-type = ["cdylib"]
8+
9+
[dependencies]
10+
# Enable only the languages which are most common on Lemmy
11+
# `select l from language l left join post p on l.id = p.language_id group by l.id order by count(p.id) desc;`
12+
lingua = { git = "https://github.com/Nutomic/lingua-rs", branch = "wasm-bindgen-feature", default-features = false, features = [
13+
"english",
14+
"dutch",
15+
"german",
16+
"french",
17+
"spanish",
18+
"polish",
19+
"danish",
20+
"portuguese",
21+
"italian",
22+
"swedish",
23+
"finnish",
24+
] }
25+
extism-pdk = "1.3.0"
26+
serde = { version = "1", features = ["derive"] }
27+
lemmy_api_common = { git = "https://github.com/LemmyNet/lemmy.git" }

plugins/rust_lingua/src/lib.rs

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
use std::cell::LazyCell;
2+
3+
use extism_pdk::FnResult;
4+
use extism_pdk::FromBytes;
5+
use extism_pdk::HttpRequest;
6+
use extism_pdk::Json;
7+
use extism_pdk::ToBytes;
8+
use extism_pdk::config;
9+
use extism_pdk::http;
10+
use extism_pdk::plugin_fn;
11+
use extism_pdk::var;
12+
use lemmy_api_common::comment::CommentInsertForm;
13+
use lemmy_api_common::language::Language as LemmyLanguage;
14+
use lemmy_api_common::language::LanguageId;
15+
use lemmy_api_common::plugin::PluginMetadata;
16+
use lemmy_api_common::post::PostInsertForm;
17+
use lemmy_api_common::site::GetSiteResponse;
18+
use lingua::Language;
19+
use lingua::LanguageDetector;
20+
use lingua::LanguageDetectorBuilder;
21+
use serde::Deserialize;
22+
use serde::Serialize;
23+
24+
// Returns info about the plugin which gets included in /api/v4/site
25+
#[plugin_fn]
26+
pub fn metadata() -> FnResult<Json<PluginMetadata>> {
27+
// initialize the detector because it takes a long time (~5s)
28+
LazyCell::<LanguageDetector>::force(&DETECTOR);
29+
30+
Ok(Json(PluginMetadata::new(
31+
"Lingua",
32+
"https://github.com/LemmyNet/lemmy-plugins/",
33+
"Automatic language tagging for posts and comments",
34+
)))
35+
}
36+
37+
// Usage: https://docs.rs/lingua/1.7.2/lingua/index.html
38+
// There are various optimizations available, which could be exposed as plugin settings
39+
const DETECTOR: LazyCell<LanguageDetector> =
40+
LazyCell::new(|| LanguageDetectorBuilder::from_all_languages().build());
41+
42+
#[plugin_fn]
43+
pub fn local_post_before_create(
44+
Json(mut form): Json<PostInsertForm>,
45+
) -> FnResult<Json<PostInsertForm>> {
46+
let content = format!("{} {}", form.name, form.body.clone().unwrap_or_default());
47+
detect_language(content, &mut form.language_id)?;
48+
Ok(Json(form))
49+
}
50+
51+
#[plugin_fn]
52+
pub fn local_comment_before_create(
53+
Json(mut form): Json<CommentInsertForm>,
54+
) -> FnResult<Json<CommentInsertForm>> {
55+
detect_language(form.content.clone(), &mut form.language_id)?;
56+
Ok(Json(form))
57+
}
58+
59+
#[plugin_fn]
60+
pub fn federated_post_before_receive(
61+
Json(mut form): Json<PostInsertForm>,
62+
) -> FnResult<Json<PostInsertForm>> {
63+
let content = format!("{} {}", form.name, form.body.clone().unwrap_or_default());
64+
detect_language(content, &mut form.language_id)?;
65+
Ok(Json(form))
66+
}
67+
68+
#[plugin_fn]
69+
pub fn federated_comment_before_receive(
70+
Json(mut form): Json<CommentInsertForm>,
71+
) -> FnResult<Json<CommentInsertForm>> {
72+
detect_language(form.content.clone(), &mut form.language_id)?;
73+
Ok(Json(form))
74+
}
75+
76+
fn detect_language(content: String, language_id: &mut Option<LanguageId>) -> FnResult<()> {
77+
if language_id.is_none() {
78+
let detected_language: Option<Language> = DETECTOR.detect_language_of(content);
79+
80+
if let Some(detected_language) = detected_language {
81+
let all_langs = all_languages()?;
82+
let lang = all_langs
83+
.iter()
84+
.find(|l| l.code == detected_language.iso_code_639_1().to_string());
85+
*language_id = lang.map(|l| l.id);
86+
}
87+
}
88+
Ok(())
89+
}
90+
91+
#[derive(Deserialize, Serialize, FromBytes, ToBytes)]
92+
#[encoding(Json)]
93+
struct AllLanguages(Vec<LemmyLanguage>);
94+
95+
fn all_languages() -> FnResult<Vec<LemmyLanguage>> {
96+
const KEY: &str = "all_languages";
97+
let langs = var::get::<AllLanguages>(KEY)?;
98+
if let Some(langs) = langs {
99+
Ok(langs.0)
100+
} else {
101+
let lemmy_url = config::get("lemmy_url")?.unwrap();
102+
let req = HttpRequest {
103+
url: format!("{lemmy_url}api/v4/site"),
104+
headers: Default::default(),
105+
method: Some("GET".to_string()),
106+
};
107+
let site: GetSiteResponse = http::request::<()>(&req, None)?.json()?;
108+
let langs = site.all_languages;
109+
var::set(KEY, AllLanguages(langs.clone()))?;
110+
Ok(langs)
111+
}
112+
}

0 commit comments

Comments
 (0)