Skip to content

Commit 163bb96

Browse files
authored
Merge pull request #1440 from kenkoooo/staging
言語をなんとかするやつ
2 parents a6bdfad + 5a0a454 commit 163bb96

File tree

5 files changed

+111
-36
lines changed

5 files changed

+111
-36
lines changed

atcoder-problems-backend/sql-client/src/language_count.rs

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ use crate::models::{Submission, UserLanguageCount, UserLanguageCountRank, UserPr
22
use crate::{PgPool, MAX_INSERT_ROWS};
33
use anyhow::Result;
44
use async_trait::async_trait;
5-
use regex::Regex;
65
use sqlx::postgres::PgRow;
76
use sqlx::Row;
87
use std::collections::{BTreeMap, BTreeSet};
@@ -192,12 +191,33 @@ impl LanguageCountClient for PgPool {
192191
}
193192
}
194193

194+
const MAPPING: [(&str, &str); 9] = [
195+
("PyPy", "Python"),
196+
("Python (Cython", "Cython"),
197+
("Assembly x64", "Assembly x64"),
198+
("Awk", "AWK"),
199+
("IOI-Style", "C++"),
200+
("LuaJIT", "Lua"),
201+
("Seed7", "Seed7"),
202+
("Perl6", "Raku"),
203+
("Objective-C", "Objective-C"),
204+
];
205+
195206
fn simplify_language(lang: &str) -> String {
196-
let re = Regex::new(r"\d*\s*\(.*\)").unwrap();
197-
if lang.starts_with("Perl6") {
198-
"Raku".to_string()
199-
} else {
200-
re.replace(lang, "").to_string()
207+
for (beginning, simplified) in MAPPING {
208+
if lang.starts_with(beginning) {
209+
return simplified.to_string();
210+
}
211+
}
212+
213+
let simplified = lang
214+
.chars()
215+
.take_while(|&c| !c.is_numeric() && c != '(' && c != '-')
216+
.collect::<String>();
217+
218+
match simplified.len() {
219+
0 => lang.to_string(),
220+
_ => simplified.trim().to_string(),
201221
}
202222
}
203223

@@ -207,12 +227,31 @@ mod tests {
207227

208228
#[test]
209229
fn test_simplify_language() {
210-
assert_eq!(simplify_language("language1"), "language1");
230+
assert_eq!(simplify_language("language1"), "language");
211231
assert_eq!(simplify_language("Perl (5)"), "Perl");
212232
assert_eq!(simplify_language("Perl6"), "Raku");
213233
assert_eq!(simplify_language("Fortran(GNU Fortran 9.2.1)"), "Fortran");
214234
assert_eq!(simplify_language("Ada2012 (GNAT 9.2.1)"), "Ada");
215-
assert_eq!(simplify_language("PyPy2 (7.3.0)"), "PyPy");
216-
assert_eq!(simplify_language("Haxe (4.0.3); js"), "Haxe; js");
235+
assert_eq!(simplify_language("Haxe (4.0.3); js"), "Haxe");
236+
assert_eq!(simplify_language("C++11 (Clang++ 3.4)"), "C++");
237+
assert_eq!(simplify_language("C++ 20 (gcc 12.2)"), "C++");
238+
assert_eq!(simplify_language("C# 11.0 (.NET 7.0.7)"), "C#");
239+
assert_eq!(simplify_language("C# 11.0 AOT (.NET 7.0.7)"), "C#");
240+
assert_eq!(simplify_language("Visual Basic 16.9 (...)"), "Visual Basic");
241+
assert_eq!(simplify_language("><> (fishr 0.1.0)"), "><>");
242+
assert_eq!(simplify_language("プロデル (...)"), "プロデル");
243+
244+
// mapped individually
245+
assert_eq!(simplify_language("Assembly x64"), "Assembly x64");
246+
assert_eq!(simplify_language("Awk (GNU Awk 4.1.4)"), "AWK");
247+
assert_eq!(simplify_language("IOI-Style C++ (GCC 5.4.1)"), "C++");
248+
assert_eq!(simplify_language("LuaJIT (2.0.4)"), "Lua");
249+
assert_eq!(simplify_language("Objective-C (Clang3.8.0)"), "Objective-C");
250+
assert_eq!(simplify_language("PyPy2 (7.3.0)"), "Python");
251+
assert_eq!(simplify_language("Python (Cython 0.29.34)"), "Cython");
252+
assert_eq!(simplify_language("Cython (0.29.16)"), "Cython");
253+
assert_eq!(simplify_language("Seed7 (Seed7 3.2.1)"), "Seed7");
254+
255+
assert_eq!(simplify_language("1234"), "1234");
217256
}
218257
}

atcoder-problems-backend/sql-client/tests/test_language_count.rs

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -11,35 +11,35 @@ async fn test_language_count() {
1111
id: 1,
1212
problem_id: "problem1".to_owned(),
1313
user_id: "user1".to_owned(),
14-
language: "language1".to_owned(),
14+
language: "language x".to_owned(),
1515
..Default::default()
1616
},
1717
Submission {
1818
id: 2,
1919
problem_id: "problem2".to_owned(),
2020
user_id: "user1".to_owned(),
21-
language: "language1".to_owned(),
21+
language: "language x".to_owned(),
2222
..Default::default()
2323
},
2424
Submission {
2525
id: 3,
2626
problem_id: "problem1".to_owned(),
2727
user_id: "user1".to_owned(),
28-
language: "language1".to_owned(),
28+
language: "language x".to_owned(),
2929
..Default::default()
3030
},
3131
Submission {
3232
id: 4,
3333
problem_id: "problem1".to_owned(),
3434
user_id: "user1".to_owned(),
35-
language: "language2".to_owned(),
35+
language: "language y".to_owned(),
3636
..Default::default()
3737
},
3838
Submission {
3939
id: 5,
4040
problem_id: "problem1".to_owned(),
4141
user_id: "user2".to_owned(),
42-
language: "language1".to_owned(),
42+
language: "language x".to_owned(),
4343
..Default::default()
4444
},
4545
Submission {
@@ -65,17 +65,17 @@ async fn test_language_count() {
6565
vec![
6666
UserLanguageCount {
6767
user_id: "user1".to_owned(),
68-
simplified_language: "language1".to_owned(),
68+
simplified_language: "language x".to_owned(),
6969
problem_count: 2
7070
},
7171
UserLanguageCount {
7272
user_id: "user1".to_owned(),
73-
simplified_language: "language2".to_owned(),
73+
simplified_language: "language y".to_owned(),
7474
problem_count: 1
7575
},
7676
UserLanguageCount {
7777
user_id: "user2".to_owned(),
78-
simplified_language: "language1".to_owned(),
78+
simplified_language: "language x".to_owned(),
7979
problem_count: 1
8080
},
8181
UserLanguageCount {
@@ -106,17 +106,17 @@ async fn test_language_count() {
106106
vec![
107107
UserLanguageCount {
108108
user_id: "user1".to_owned(),
109-
simplified_language: "language1".to_owned(),
109+
simplified_language: "language x".to_owned(),
110110
problem_count: 2
111111
},
112112
UserLanguageCount {
113113
user_id: "user1".to_owned(),
114-
simplified_language: "language2".to_owned(),
114+
simplified_language: "language y".to_owned(),
115115
problem_count: 1
116116
},
117117
UserLanguageCount {
118118
user_id: "user2".to_owned(),
119-
simplified_language: "language1".to_owned(),
119+
simplified_language: "language x".to_owned(),
120120
problem_count: 1
121121
},
122122
UserLanguageCount {
@@ -133,7 +133,7 @@ async fn test_language_count() {
133133
);
134134

135135
let language_count_1st_to_2nd = pool
136-
.load_language_count_in_range("language1", 0..2)
136+
.load_language_count_in_range("language x", 0..2)
137137
.await
138138
.unwrap();
139139
assert_eq!(
@@ -151,7 +151,7 @@ async fn test_language_count() {
151151
);
152152

153153
let language_count_2nd_to_2nd = pool
154-
.load_language_count_in_range("language1", 1..2)
154+
.load_language_count_in_range("language x", 1..2)
155155
.await
156156
.unwrap();
157157
assert_eq!(
@@ -163,7 +163,7 @@ async fn test_language_count() {
163163
);
164164

165165
assert_eq!(
166-
pool.load_language_count_in_range("language1", 0..10)
166+
pool.load_language_count_in_range("language x", 0..10)
167167
.await
168168
.unwrap()
169169
.len(),
@@ -176,12 +176,12 @@ async fn test_language_count() {
176176
vec![
177177
UserLanguageCount {
178178
user_id: "user1".to_owned(),
179-
simplified_language: "language1".to_owned(),
179+
simplified_language: "language x".to_owned(),
180180
problem_count: 2,
181181
},
182182
UserLanguageCount {
183183
user_id: "user1".to_owned(),
184-
simplified_language: "language2".to_owned(),
184+
simplified_language: "language y".to_owned(),
185185
problem_count: 1,
186186
},
187187
]
@@ -192,12 +192,12 @@ async fn test_language_count() {
192192
vec![
193193
UserLanguageCountRank {
194194
user_id: "user1".to_owned(),
195-
simplified_language: "language1".to_owned(),
195+
simplified_language: "language x".to_owned(),
196196
rank: 1,
197197
},
198198
UserLanguageCountRank {
199199
user_id: "user1".to_owned(),
200-
simplified_language: "language2".to_owned(),
200+
simplified_language: "language y".to_owned(),
201201
rank: 1,
202202
},
203203
]
@@ -208,7 +208,7 @@ async fn test_language_count() {
208208
language_count,
209209
vec![UserLanguageCount {
210210
user_id: "user2".to_owned(),
211-
simplified_language: "language1".to_owned(),
211+
simplified_language: "language x".to_owned(),
212212
problem_count: 1,
213213
},]
214214
);
@@ -217,7 +217,7 @@ async fn test_language_count() {
217217
language_count_rank,
218218
vec![UserLanguageCountRank {
219219
user_id: "user2".to_owned(),
220-
simplified_language: "language1".to_owned(),
220+
simplified_language: "language x".to_owned(),
221221
rank: 2,
222222
},]
223223
);
@@ -255,5 +255,5 @@ async fn test_language_count() {
255255
]
256256
);
257257
let languages = pool.load_languages().await.unwrap();
258-
assert_eq!(languages, ["language1", "language2", "Perl", "Raku"]);
258+
assert_eq!(languages, ["language x", "language y", "Perl", "Raku"]);
259259
}

atcoder-problems-frontend/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111
"format": "yarn run sort-package-json && yarn run prettier && yarn run lint:fix",
1212
"lint": "eslint --ignore-path .gitignore \"./src/**/*.{js,jsx,ts,tsx}\"",
1313
"lint:fix": "yarn run lint --fix",
14-
"md:serve": "mdbook serve ../guide/",
15-
"md:clean": "mdbook clean ../guide/ -d=../atcoder-problems-frontend/build/book/",
1614
"md:build": "mkdir -p build/book && yarn run md:clean && mdbook build ../guide/ -d=../atcoder-problems-frontend/build/book/",
15+
"md:clean": "mdbook clean ../guide/ -d=../atcoder-problems-frontend/build/book/",
16+
"md:serve": "mdbook serve ../guide/",
1717
"prepare-ci": "rm -rf serve && mkdir serve && cp -r build serve/atcoder",
1818
"prettier": "prettier --write --ignore-path .gitignore \"**/*.{css,scss,html,js,json,jsx,md,ts,tsx}\"",
1919
"sort-package-json": "sort-package-json",

atcoder-problems-frontend/src/utils/LanguageNormalizer.test.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,26 @@ test("normalize language", () => {
77
expect(normalizeLanguage("C++11 (Clang++ 3.4)")).toBe("C++");
88
expect(normalizeLanguage("Scala (2.11.7)")).toBe("Scala");
99
expect(normalizeLanguage("Fortran(GNU Fortran 9.2.1)")).toBe("Fortran");
10+
expect(normalizeLanguage("Ada2012 (GNAT 9.2.1)")).toBe("Ada");
11+
expect(normalizeLanguage("Haxe (4.0.3); js")).toBe("Haxe");
12+
expect(normalizeLanguage("C++11 (Clang++ 3.4)")).toBe("C++");
13+
expect(normalizeLanguage("C++ 20 (gcc 12.2)")).toBe("C++");
14+
expect(normalizeLanguage("C# 11.0 (.NET 7.0.7)")).toBe("C#");
15+
expect(normalizeLanguage("C# 11.0 AOT (.NET 7.0.7)")).toBe("C#");
16+
expect(normalizeLanguage("Visual Basic 16.9 (...)")).toBe("Visual Basic");
17+
expect(normalizeLanguage("><> (fishr 0.1.0)")).toBe("><>");
18+
expect(normalizeLanguage("プロデル (...)")).toBe("プロデル");
19+
20+
// mapped individually
21+
expect(normalizeLanguage("Assembly x64")).toBe("Assembly x64");
22+
expect(normalizeLanguage("Awk (GNU Awk 4.1.4)")).toBe("AWK");
23+
expect(normalizeLanguage("IOI-Style C++ (GCC 5.4.1)")).toBe("C++");
24+
expect(normalizeLanguage("LuaJIT (2.0.4)")).toBe("Lua");
25+
expect(normalizeLanguage("Objective-C (Clang3.8.0)")).toBe("Objective-C");
26+
expect(normalizeLanguage("PyPy2 (7.3.0)")).toBe("Python");
27+
expect(normalizeLanguage("Python (Cython 0.29.34)")).toBe("Cython");
28+
expect(normalizeLanguage("Cython (0.29.16)")).toBe("Cython");
29+
expect(normalizeLanguage("Seed7 (Seed7 3.2.1)")).toBe("Seed7");
30+
31+
expect(normalizeLanguage("1234")).toBe("Unknown");
1032
});
Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,21 @@
1+
const mapping: [string, string][] = [
2+
["PyPy", "Python"],
3+
["Python (Cython", "Cython"],
4+
["Assembly x64", "Assembly x64"],
5+
["Awk", "AWK"],
6+
["IOI-Style", "C++"],
7+
["LuaJIT", "Lua"],
8+
["Seed7", "Seed7"],
9+
["Perl6", "Raku"],
10+
["Objective-C", "Objective-C"],
11+
];
12+
113
export const normalizeLanguage = (language: string): string => {
2-
if (language.startsWith("Perl6")) {
3-
return "Raku";
4-
} else {
5-
return language.replace(/\d*\s*\(.*\)$/, "");
14+
for (const [beginning, normalized] of mapping) {
15+
if (language.startsWith(beginning)) {
16+
return normalized;
17+
}
618
}
19+
20+
return language.replace(/\s*[\d(-].*/, "") || "Unknown";
721
};

0 commit comments

Comments
 (0)