Skip to content

Commit 5a257a9

Browse files
committed
Merge pull request #112073 from timothyqiu/make-csv-great-again
Improve CSV translations
2 parents ae8c929 + c1ee8e5 commit 5a257a9

File tree

5 files changed

+210
-57
lines changed

5 files changed

+210
-57
lines changed

core/string/optimized_translation.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -314,10 +314,24 @@ StringName OptimizedTranslation::get_plural_message(const StringName &p_src_text
314314
return get_message(p_src_text, p_context);
315315
}
316316

317+
Vector<String> OptimizedTranslation::_get_message_list() const {
318+
WARN_PRINT_ONCE("OptimizedTranslation does not store the message texts to be translated.");
319+
return {};
320+
}
321+
322+
void OptimizedTranslation::get_message_list(List<StringName> *r_messages) const {
323+
WARN_PRINT_ONCE("OptimizedTranslation does not store the message texts to be translated.");
324+
}
325+
326+
int OptimizedTranslation::get_message_count() const {
327+
WARN_PRINT_ONCE("OptimizedTranslation does not store the message texts to be translated.");
328+
return 0;
329+
}
330+
317331
void OptimizedTranslation::_get_property_list(List<PropertyInfo> *p_list) const {
318-
p_list->push_back(PropertyInfo(Variant::PACKED_INT32_ARRAY, "hash_table"));
319-
p_list->push_back(PropertyInfo(Variant::PACKED_INT32_ARRAY, "bucket_table"));
320-
p_list->push_back(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "strings"));
332+
p_list->push_back(PropertyInfo(Variant::PACKED_INT32_ARRAY, "hash_table", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR));
333+
p_list->push_back(PropertyInfo(Variant::PACKED_INT32_ARRAY, "bucket_table", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR));
334+
p_list->push_back(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "strings", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR));
321335
p_list->push_back(PropertyInfo(Variant::OBJECT, "load_from", PROPERTY_HINT_RESOURCE_TYPE, "Translation", PROPERTY_USAGE_EDITOR));
322336
}
323337

core/string/optimized_translation.h

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,23 +35,35 @@
3535
class OptimizedTranslation : public Translation {
3636
GDCLASS(OptimizedTranslation, Translation);
3737

38-
//this translation uses a sort of modified perfect hash algorithm
39-
//it requires hashing strings twice and then does a binary search,
40-
//so it's slower, but at the same time it has an extremely high chance
41-
//of catching untranslated strings
38+
// This translation uses a sort of modified perfect hash algorithm
39+
// it requires hashing strings twice and then does a binary search,
40+
// so it's slower, but at the same time it has an extremely high chance
41+
// of catching untranslated strings.
4242

43-
//load/store friendly types
43+
// `hash_table[hash(0, text)]` produces a `bucket_table` index or 0xFFFFFFFF if not found.
4444
Vector<int> hash_table;
45+
46+
// Continuous `Bucket`s in a flat layout.
4547
Vector<int> bucket_table;
48+
49+
// Data for translated strings, UTF-8 encoded, either compressed or uncompressed.
4650
Vector<uint8_t> strings;
4751

4852
struct Bucket {
53+
// Number of `Elem` objects at `elem`.
4954
int size;
55+
56+
// Use `hash(func, text)` to generate the unique `Elem::key` in this bucket.
5057
uint32_t func;
5158

5259
struct Elem {
60+
// Unique key for the text.
5361
uint32_t key;
62+
63+
// Used to index into `strings`.
5464
uint32_t str_offset;
65+
66+
// The string is not compressed if `comp_size` equals `uncomp_size`.
5567
uint32_t comp_size;
5668
uint32_t uncomp_size;
5769
};
@@ -71,6 +83,8 @@ class OptimizedTranslation : public Translation {
7183
return d;
7284
}
7385

86+
virtual Vector<String> _get_message_list() const override;
87+
7488
protected:
7589
bool _set(const StringName &p_name, const Variant &p_value);
7690
bool _get(const StringName &p_name, Variant &r_ret) const;
@@ -83,5 +97,8 @@ class OptimizedTranslation : public Translation {
8397
virtual Vector<String> get_translated_message_list() const override;
8498
void generate(const Ref<Translation> &p_from);
8599

100+
virtual void get_message_list(List<StringName> *r_messages) const override;
101+
virtual int get_message_count() const override;
102+
86103
OptimizedTranslation() {}
87104
};

doc/classes/OptimizedTranslation.xml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
<?xml version="1.0" encoding="UTF-8" ?>
22
<class name="OptimizedTranslation" inherits="Translation" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="../class.xsd">
33
<brief_description>
4-
An optimized translation, used by default for CSV Translations.
4+
An optimized translation.
55
</brief_description>
66
<description>
7-
An optimized translation, used by default for CSV Translations. Uses real-time compressed translations, which results in very small dictionaries.
7+
An optimized translation. Uses real-time compressed translations, which results in very small dictionaries.
8+
This class does not store the untranslated strings for optimization purposes. Therefore, [method Translation.get_message_list] always returns an empty array, and [method Translation.get_message_count] always returns [code]0[/code].
89
</description>
910
<tutorials>
1011
</tutorials>
@@ -14,6 +15,7 @@
1415
<param index="0" name="from" type="Translation" />
1516
<description>
1617
Generates and sets an optimized translation from the given [Translation] resource.
18+
[b]Note:[/b] Messages in [param from] should not use context or plural forms.
1719
[b]Note:[/b] This method is intended to be used in the editor. It does nothing when called from an exported project.
1820
</description>
1921
</method>
Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
<?xml version="1.0" encoding="UTF-8" ?>
22
<class name="ResourceImporterCSVTranslation" inherits="ResourceImporter" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="../class.xsd">
33
<brief_description>
4-
Imports comma-separated values
4+
Imports comma-separated values as [Translation]s.
55
</brief_description>
66
<description>
77
Comma-separated values are a plain text table storage format. The format's simplicity makes it easy to edit in any text editor or spreadsheet software. This makes it a common choice for game localization.
8+
In the CSV file used for translation, the first column contains string identifiers, and the first row serves as the header. The first column's header can be any value. The remaining headers indicate the locale for that column. Columns whose headers begin with an underscore ([code]_[/code]) will be ignored.
89
[b]Example CSV file:[/b]
910
[codeblock lang=text]
1011
keys,en,es,ja
@@ -13,16 +14,38 @@
1314
BYE,Goodbye,Adiós,さようなら
1415
QUOTE,"""Hello"" said the man.","""Hola"" dijo el hombre.",「こんにちは」男は言いました
1516
[/codeblock]
17+
Although keys in the first column typically use uppercase string identifiers, it is not uncommon to directly use strings appearing in the game as keys. To avoid string ambiguity, you can use a special [code]?context[/code] column to specify the context to use with [method Object.tr].
18+
[codeblock lang=text]
19+
en,?context,fr,ja,zh
20+
Letter,Alphabet,Lettre,字母,字母
21+
Letter,Message,Courrier,手紙,信件
22+
[/codeblock]
23+
To set the plural form of a string to use with [method Object.tr_n], add a special [code]?plural[/code] column. After setting the plural form of the source string in this column, you can add additional rows to provide translations for more plural forms. The first column and all special columns in these plural form rows must be empty.
24+
Godot includes built-in plural rules for some languages. You can also customize them using a special [code]?pluralrule[/code] row. See [url=https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html]GNU gettext[/url] for examples and more info.
25+
[codeblock lang=text]
26+
en,?plural,fr,ru,zh,_Comment
27+
?pluralrule,,nplurals=2; plural=(n &gt;= 2);,,,Customize the plural rule for French
28+
There is %d apple,There are %d apples,Il y a %d pomme,Есть %d яблоко,那里有%d个苹果,
29+
,,Il y a %d pommes,Есть %d яблока,,
30+
,,,Есть %d яблок,,
31+
[/codeblock]
1632
</description>
1733
<tutorials>
1834
<link title="Importing translations">$DOCS_URL/tutorials/assets_pipeline/importing_translations.html</link>
1935
</tutorials>
2036
<members>
21-
<member name="compress" type="bool" setter="" getter="" default="true">
22-
If [code]true[/code], creates an [OptimizedTranslation] instead of a [Translation]. This makes the resulting file smaller at the cost of a small CPU overhead.
37+
<member name="compress" type="int" setter="" getter="" default="1">
38+
- [b]Disabled[/b]: Creates a [Translation].
39+
- [b]Auto[/b]: Creates an [OptimizedTranslation] when possible. This makes the resulting file smaller at the cost of a small CPU overhead. Falls back to [Translation] for translations with context or plural forms.
2340
</member>
2441
<member name="delimiter" type="int" setter="" getter="" default="0">
2542
The delimiter to use in the CSV file. The default value matches the common CSV convention. Tab-separated values are sometimes called TSV files.
2643
</member>
44+
<member name="unescape_keys" type="bool" setter="" getter="" default="false">
45+
If [code]true[/code], message keys in the CSV file are unescaped using [method String.c_unescape] during the import process.
46+
</member>
47+
<member name="unescape_translations" type="bool" setter="" getter="" default="true">
48+
If [code]true[/code], message translations in the CSV file are unescaped using [method String.c_unescape] during the import process.
49+
</member>
2750
</members>
2851
</class>

editor/import/resource_importer_csv_translation.cpp

Lines changed: 141 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -68,81 +68,178 @@ String ResourceImporterCSVTranslation::get_preset_name(int p_idx) const {
6868
}
6969

7070
void ResourceImporterCSVTranslation::get_import_options(const String &p_path, List<ImportOption> *r_options, int p_preset) const {
71-
r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "compress"), true));
71+
r_options->push_back(ImportOption(PropertyInfo(Variant::INT, "compress", PROPERTY_HINT_ENUM, "Disabled,Auto"), 1)); // Enum for compatibility with previous versions.
7272
r_options->push_back(ImportOption(PropertyInfo(Variant::INT, "delimiter", PROPERTY_HINT_ENUM, "Comma,Semicolon,Tab"), 0));
73+
r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "unescape_keys"), false));
74+
r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "unescape_translations"), true));
7375
}
7476

7577
Error ResourceImporterCSVTranslation::import(ResourceUID::ID p_source_id, const String &p_source_file, const String &p_save_path, const HashMap<StringName, Variant> &p_options, List<String> *r_platform_variants, List<String> *r_gen_files, Variant *r_metadata) {
76-
bool compress = p_options["compress"];
78+
Ref<FileAccess> f = FileAccess::open(p_source_file, FileAccess::READ);
79+
ERR_FAIL_COND_V_MSG(f.is_null(), ERR_INVALID_PARAMETER, "Cannot open file from path '" + p_source_file + "'.");
7780

7881
String delimiter;
7982
switch ((int)p_options["delimiter"]) {
80-
case 0:
81-
delimiter = ",";
82-
break;
83-
case 1:
83+
case 1: {
8484
delimiter = ";";
85-
break;
86-
case 2:
85+
} break;
86+
case 2: {
8787
delimiter = "\t";
88-
break;
88+
} break;
89+
default: {
90+
delimiter = ",";
91+
} break;
8992
}
9093

91-
Ref<FileAccess> f = FileAccess::open(p_source_file, FileAccess::READ);
92-
ERR_FAIL_COND_V_MSG(f.is_null(), ERR_INVALID_PARAMETER, "Cannot open file from path '" + p_source_file + "'.");
93-
94-
Vector<String> line = f->get_csv_line(delimiter);
95-
ERR_FAIL_COND_V(line.size() <= 1, ERR_PARSE_ERROR);
96-
97-
Vector<String> locales;
98-
Vector<Ref<Translation>> translations;
99-
HashSet<int> skipped_locales;
94+
// Parse the header row.
95+
HashMap<int, Ref<Translation>> column_to_translation;
96+
int context_column = -1;
97+
int plural_column = -1;
98+
{
99+
const Vector<String> line = f->get_csv_line(delimiter);
100+
for (int i = 1; i < line.size(); i++) {
101+
if (line[i].left(1) == "_") {
102+
continue;
103+
}
104+
if (line[i].to_lower() == "?context") {
105+
ERR_CONTINUE_MSG(context_column != -1, "Error importing CSV translation: Multiple '?context' columns found. Only one is allowed. Subsequent ones will be ignored.");
106+
context_column = i;
107+
continue;
108+
}
109+
if (line[i].to_lower() == "?plural") {
110+
ERR_CONTINUE_MSG(plural_column != -1, "Error importing CSV translation: Multiple '?plural' columns found. Only one is allowed. Subsequent ones will be ignored.");
111+
plural_column = i;
112+
continue;
113+
}
100114

101-
for (int i = 1; i < line.size(); i++) {
102-
String locale = TranslationServer::get_singleton()->standardize_locale(line[i]);
115+
const String locale = TranslationServer::get_singleton()->standardize_locale(line[i]);
116+
ERR_CONTINUE_MSG(locale.is_empty(), vformat("Error importing CSV translation: Invalid locale format '%s', should be 'language_Script_COUNTRY_VARIANT@extra'. This column will be ignored.", line[i]));
103117

104-
if (line[i].left(1) == "_") {
105-
skipped_locales.insert(i);
106-
continue;
107-
} else if (locale.is_empty()) {
108-
skipped_locales.insert(i);
109-
ERR_CONTINUE_MSG(true, vformat("Error importing CSV translation: Invalid locale format '%s', should be 'language_Script_COUNTRY_VARIANT@extra'. This column will be ignored.", line[i]));
118+
Ref<Translation> translation;
119+
translation.instantiate();
120+
translation->set_locale(locale);
121+
column_to_translation[i] = translation;
110122
}
111123

112-
locales.push_back(locale);
113-
Ref<Translation> translation;
114-
translation.instantiate();
115-
translation->set_locale(locale);
116-
translations.push_back(translation);
124+
ERR_FAIL_COND_V_MSG(column_to_translation.is_empty(), ERR_PARSE_ERROR, "Error importing CSV translation: The CSV file must have at least one column for key and one column for translation.");
117125
}
118126

119-
do {
120-
line = f->get_csv_line(delimiter);
121-
String key = line[0];
122-
if (!key.is_empty()) {
123-
ERR_CONTINUE_MSG(line.size() != locales.size() + (int)skipped_locales.size() + 1, vformat("Error importing CSV translation: expected %d locale(s), but the '%s' key has %d locale(s).", locales.size(), key, line.size() - 1));
127+
// Parse content rows.
128+
bool context_used = false;
129+
bool plural_used = false;
130+
{
131+
const bool unescape_keys = p_options.has("unescape_keys") ? bool(p_options["unescape_keys"]) : false;
132+
const bool unescape_translations = p_options.has("unescape_translations") ? bool(p_options["unescape_translations"]) : true;
133+
134+
bool reading_plural_rows = false;
135+
String plural_msgid;
136+
String plural_msgctxt;
137+
HashMap<int, Vector<String>> plural_msgstrs;
138+
139+
do {
140+
const Vector<String> line = f->get_csv_line(delimiter);
141+
142+
// Skip empty lines.
143+
if (line.size() == 1 && line[0].is_empty()) {
144+
continue;
145+
}
146+
147+
if (line[0].to_lower() == "?pluralrule") {
148+
for (int i = 1; i < line.size(); i++) {
149+
if (line[i].is_empty() || !column_to_translation.has(i)) {
150+
continue;
151+
}
152+
Ref<Translation> translation = column_to_translation[i];
153+
ERR_CONTINUE_MSG(!translation->get_plural_rules_override().is_empty(), vformat("Error importing CSV translation: Multiple '?pluralrule' definitions found for locale '%s'. Only one is allowed. Subsequent ones will be ignored.", translation->get_locale()));
154+
translation->set_plural_rules_override(line[i]);
155+
}
156+
continue;
157+
}
158+
159+
const String msgid = unescape_keys ? line[0].c_unescape() : line[0];
160+
if (!reading_plural_rows && msgid.is_empty()) {
161+
continue;
162+
}
163+
164+
// It's okay if you define context or plural columns but don't use them.
165+
const String msgctxt = (context_column != -1 && context_column < line.size()) ? line[context_column] : String();
166+
if (!msgctxt.is_empty()) {
167+
context_used = true;
168+
}
169+
const String msgid_plural = (plural_column != -1 && plural_column < line.size()) ? line[plural_column] : String();
170+
if (!msgid_plural.is_empty()) {
171+
plural_used = true;
172+
}
173+
174+
// End of plural rows.
175+
if (reading_plural_rows && (!msgid.is_empty() || !msgctxt.is_empty() || !msgid_plural.is_empty())) {
176+
reading_plural_rows = false;
177+
178+
for (KeyValue<int, Ref<Translation>> E : column_to_translation) {
179+
Ref<Translation> translation = E.value;
180+
const Vector<String> &msgstrs = plural_msgstrs[E.key];
181+
if (!msgstrs.is_empty()) {
182+
translation->add_plural_message(plural_msgid, msgstrs, plural_msgctxt);
183+
}
184+
}
185+
plural_msgstrs.clear();
186+
}
187+
188+
// Start of plural rows.
189+
if (!reading_plural_rows && !msgid_plural.is_empty()) {
190+
reading_plural_rows = true;
191+
plural_msgid = msgid;
192+
plural_msgctxt = msgctxt;
193+
}
124194

125-
int write_index = 0; // Keep track of translations written in case some locales are skipped.
126195
for (int i = 1; i < line.size(); i++) {
127-
if (skipped_locales.has(i)) {
196+
if (!column_to_translation.has(i)) {
128197
continue;
129198
}
130-
translations.write[write_index++]->add_message(key, line[i].c_unescape());
199+
const String msgstr = unescape_translations ? line[i].c_unescape() : line[i];
200+
if (msgstr.is_empty()) {
201+
continue;
202+
}
203+
if (reading_plural_rows) {
204+
plural_msgstrs[i].push_back(msgstr);
205+
} else {
206+
column_to_translation[i]->add_message(msgid, msgstr, msgctxt);
207+
}
208+
}
209+
} while (!f->eof_reached());
210+
211+
if (reading_plural_rows) {
212+
for (KeyValue<int, Ref<Translation>> E : column_to_translation) {
213+
Ref<Translation> translation = E.value;
214+
const Vector<String> &msgstrs = plural_msgstrs[E.key];
215+
if (!msgstrs.is_empty()) {
216+
translation->add_plural_message(plural_msgid, msgstrs, plural_msgctxt);
217+
}
131218
}
132219
}
133-
} while (!f->eof_reached());
220+
}
221+
222+
bool compress;
223+
switch ((int)p_options["compress"]) {
224+
case 0: { // Disabled.
225+
compress = false;
226+
} break;
227+
default: { // Auto.
228+
compress = !context_used && !plural_used;
229+
} break;
230+
}
134231

135-
for (int i = 0; i < translations.size(); i++) {
136-
Ref<Translation> xlt = translations[i];
232+
for (KeyValue<int, Ref<Translation>> E : column_to_translation) {
233+
Ref<Translation> xlt = E.value;
137234

138235
if (compress) {
139236
Ref<OptimizedTranslation> cxl = memnew(OptimizedTranslation);
140237
cxl->generate(xlt);
141238
xlt = cxl;
142239
}
143240

144-
String save_path = p_source_file.get_basename() + "." + translations[i]->get_locale() + ".translation";
145-
ResourceUID::ID save_id = hash64_murmur3_64(translations[i]->get_locale().hash64(), p_source_id) & 0x7FFFFFFFFFFFFFFF;
241+
String save_path = p_source_file.get_basename() + "." + xlt->get_locale() + ".translation";
242+
ResourceUID::ID save_id = hash64_murmur3_64(xlt->get_locale().hash64(), p_source_id) & 0x7FFFFFFFFFFFFFFF;
146243
bool uid_already_exists = ResourceUID::get_singleton()->has_id(save_id);
147244
if (uid_already_exists) {
148245
// Avoid creating a new file with a duplicate UID.

0 commit comments

Comments
 (0)