Skip to content

Commit bfcd349

Browse files
authored
Merge pull request #1438 from roumcha/fix-language-name-normalization
Fix language name normalization
2 parents dc92d7b + 09b6617 commit bfcd349

File tree

3 files changed

+88
-12
lines changed

3 files changed

+88
-12
lines changed

atcoder-problems-backend/sql-client/src/language_count.rs

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -192,12 +192,33 @@ impl LanguageCountClient for PgPool {
192192
}
193193
}
194194

195+
const MAPPING: [(&str, &str); 9] = [
196+
("PyPy", "Python"),
197+
("Python (Cython", "Cython"),
198+
("Assembly x64", "Assembly x64"),
199+
("Awk", "AWK"),
200+
("IOI-Style", "C++"),
201+
("LuaJIT", "Lua"),
202+
("Seed7", "Seed7"),
203+
("Perl6", "Raku"),
204+
("Objective-C", "Objective-C"),
205+
];
206+
195207
fn simplify_language(lang: &str) -> String {
196-
let re = Regex::new(r"\d*\s*\(.*\)").unwrap();
197-
if lang.starts_with("Perl6") {
198-
"Raku".to_string()
199-
} else {
200-
re.replace(lang, "").to_string()
208+
for (beginning, simplified) in MAPPING {
209+
if lang.starts_with(beginning) {
210+
return simplified.to_string();
211+
}
212+
}
213+
214+
let simplified = Regex::new(r"\s*[\d(\-].*")
215+
.unwrap()
216+
.replace(lang, "")
217+
.to_string();
218+
219+
match simplified.len() {
220+
0 => String::from("Unknown"),
221+
_ => simplified,
201222
}
202223
}
203224

@@ -207,12 +228,31 @@ mod tests {
207228

208229
#[test]
209230
fn test_simplify_language() {
210-
assert_eq!(simplify_language("language1"), "language1");
231+
assert_eq!(simplify_language("language1"), "language");
211232
assert_eq!(simplify_language("Perl (5)"), "Perl");
212233
assert_eq!(simplify_language("Perl6"), "Raku");
213234
assert_eq!(simplify_language("Fortran(GNU Fortran 9.2.1)"), "Fortran");
214235
assert_eq!(simplify_language("Ada2012 (GNAT 9.2.1)"), "Ada");
215-
assert_eq!(simplify_language("PyPy2 (7.3.0)"), "PyPy");
216-
assert_eq!(simplify_language("Haxe (4.0.3); js"), "Haxe; js");
236+
assert_eq!(simplify_language("Haxe (4.0.3); js"), "Haxe");
237+
assert_eq!(simplify_language("C++11 (Clang++ 3.4)"), "C++");
238+
assert_eq!(simplify_language("C++ 20 (gcc 12.2)"), "C++");
239+
assert_eq!(simplify_language("C# 11.0 (.NET 7.0.7)"), "C#");
240+
assert_eq!(simplify_language("C# 11.0 AOT (.NET 7.0.7)"), "C#");
241+
assert_eq!(simplify_language("Visual Basic 16.9 (...)"), "Visual Basic");
242+
assert_eq!(simplify_language("><> (fishr 0.1.0)"), "><>");
243+
assert_eq!(simplify_language("プロデル (...)"), "プロデル");
244+
245+
// mapped individually
246+
assert_eq!(simplify_language("Assembly x64"), "Assembly x64");
247+
assert_eq!(simplify_language("Awk (GNU Awk 4.1.4)"), "AWK");
248+
assert_eq!(simplify_language("IOI-Style C++ (GCC 5.4.1)"), "C++");
249+
assert_eq!(simplify_language("LuaJIT (2.0.4)"), "Lua");
250+
assert_eq!(simplify_language("Objective-C (Clang3.8.0)"), "Objective-C");
251+
assert_eq!(simplify_language("PyPy2 (7.3.0)"), "Python");
252+
assert_eq!(simplify_language("Python (Cython 0.29.34)"), "Cython");
253+
assert_eq!(simplify_language("Cython (0.29.16)"), "Cython");
254+
assert_eq!(simplify_language("Seed7 (Seed7 3.2.1)"), "Seed7");
255+
256+
assert_eq!(simplify_language("1234"), "Unknown");
217257
}
218258
}

atcoder-problems-frontend/src/utils/LanguageNormalizer.test.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,26 @@ test("normalize language", () => {
77
expect(normalizeLanguage("C++11 (Clang++ 3.4)")).toBe("C++");
88
expect(normalizeLanguage("Scala (2.11.7)")).toBe("Scala");
99
expect(normalizeLanguage("Fortran(GNU Fortran 9.2.1)")).toBe("Fortran");
10+
expect(normalizeLanguage("Ada2012 (GNAT 9.2.1)")).toBe("Ada");
11+
expect(normalizeLanguage("Haxe (4.0.3); js")).toBe("Haxe");
12+
expect(normalizeLanguage("C++11 (Clang++ 3.4)")).toBe("C++");
13+
expect(normalizeLanguage("C++ 20 (gcc 12.2)")).toBe("C++");
14+
expect(normalizeLanguage("C# 11.0 (.NET 7.0.7)")).toBe("C#");
15+
expect(normalizeLanguage("C# 11.0 AOT (.NET 7.0.7)")).toBe("C#");
16+
expect(normalizeLanguage("Visual Basic 16.9 (...)")).toBe("Visual Basic");
17+
expect(normalizeLanguage("><> (fishr 0.1.0)")).toBe("><>");
18+
expect(normalizeLanguage("プロデル (...)")).toBe("プロデル");
19+
20+
// mapped individually
21+
expect(normalizeLanguage("Assembly x64")).toBe("Assembly x64");
22+
expect(normalizeLanguage("Awk (GNU Awk 4.1.4)")).toBe("AWK");
23+
expect(normalizeLanguage("IOI-Style C++ (GCC 5.4.1)")).toBe("C++");
24+
expect(normalizeLanguage("LuaJIT (2.0.4)")).toBe("Lua");
25+
expect(normalizeLanguage("Objective-C (Clang3.8.0)")).toBe("Objective-C");
26+
expect(normalizeLanguage("PyPy2 (7.3.0)")).toBe("Python");
27+
expect(normalizeLanguage("Python (Cython 0.29.34)")).toBe("Cython");
28+
expect(normalizeLanguage("Cython (0.29.16)")).toBe("Cython");
29+
expect(normalizeLanguage("Seed7 (Seed7 3.2.1)")).toBe("Seed7");
30+
31+
expect(normalizeLanguage("1234")).toBe("Unknown");
1032
});
Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,21 @@
1+
const mapping: [beginning: string, normalized: string][] = [
2+
["PyPy", "Python"],
3+
["Python (Cython", "Cython"],
4+
["Assembly x64", "Assembly x64"],
5+
["Awk", "AWK"],
6+
["IOI-Style", "C++"],
7+
["LuaJIT", "Lua"],
8+
["Seed7", "Seed7"],
9+
["Perl6", "Raku"],
10+
["Objective-C", "Objective-C"],
11+
];
12+
113
export const normalizeLanguage = (language: string): string => {
2-
if (language.startsWith("Perl6")) {
3-
return "Raku";
4-
} else {
5-
return language.replace(/\d*\s*\(.*\)$/, "");
14+
for (const [beginning, normalized] of mapping) {
15+
if (language.startsWith(beginning)) {
16+
return normalized;
17+
}
618
}
19+
20+
return language.replace(/\s*[\d(\-].*/, "") || "Unknown";
721
};

0 commit comments

Comments
 (0)