Skip to content

Commit 7e160a0

Browse files
committed
Setup web i18n scripts
0 parents  commit 7e160a0

File tree

5 files changed

+596
-0
lines changed

5 files changed

+596
-0
lines changed

.gitignore

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
node_modules
2+
/.npmrc
3+
/*.log
4+
package-lock.json
5+
.lock-wscript
6+
build/Release
7+
coverage
8+
lib-cov
9+
out
10+
/dist
11+
/lib

package.json

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
{
2+
"name": "matrix-web-i18n",
3+
"version": "1.0.0",
4+
"description": "Internationalisation utils for Matrix web projects",
5+
"keywords": [
6+
"i18n"
7+
],
8+
"license": "Apache-2.0",
9+
"bin": {
10+
"matrix-gen-i18n": "scripts/gen-i18n.js",
11+
"matrix-prune-i18n": "scripts/prune-i18n.js"
12+
},
13+
"repository": {
14+
"type": "git",
15+
"url": "https://github.com/matrix-org/matrix-web-i18n"
16+
},
17+
"homepage": "https://github.com/matrix-org/matrix-web-i18n",
18+
"bugs": {
19+
"url": "https://github.com/matrix-org/matrix-web-i18n/issues"
20+
},
21+
"dependencies": {},
22+
"devDependencies": {
23+
"@babel/parser": "^7.13.16",
24+
"@babel/traverse": "^7.13.17",
25+
"walk": "^2.3.14"
26+
}
27+
}

scripts/gen-i18n.js

Lines changed: 304 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
#!/usr/bin/env node
2+
3+
/*
4+
Copyright 2017 New Vector Ltd
5+
6+
Licensed under the Apache License, Version 2.0 (the "License");
7+
you may not use this file except in compliance with the License.
8+
You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
*/
18+
19+
/**
20+
* Regenerates the translations en_EN file by walking the source tree and
21+
* parsing each file with the appropriate parser. Emits a JSON file with the
22+
* translatable strings mapped to themselves in the order they appeared
23+
* in the files and grouped by the file they appeared in.
24+
*
25+
* Usage: node scripts/gen-i18n.js
26+
*/
27+
const fs = require('fs');
28+
const path = require('path');
29+
30+
const walk = require('walk');
31+
32+
const parser = require("@babel/parser");
33+
const traverse = require("@babel/traverse");
34+
35+
const TRANSLATIONS_FUNCS = ['_t', '_td'];
36+
37+
const INPUT_TRANSLATIONS_FILE = 'src/i18n/strings/en_EN.json';
38+
const OUTPUT_FILE = 'src/i18n/strings/en_EN.json';
39+
40+
// NB. The sync version of walk is broken for single files so we walk
41+
// all of res rather than just res/home.html.
42+
// https://git.daplie.com/Daplie/node-walk/merge_requests/1 fixes it,
43+
// or if we get bored waiting for it to be merged, we could switch
44+
// to a project that's actively maintained.
45+
const SEARCH_PATHS = ['src', 'res'];
46+
47+
function getObjectValue(obj, key) {
48+
for (const prop of obj.properties) {
49+
if (prop.key.type === 'Identifier' && prop.key.name === key) {
50+
return prop.value;
51+
}
52+
}
53+
return null;
54+
}
55+
56+
function getTKey(arg) {
57+
if (arg.type === 'Literal' || arg.type === "StringLiteral") {
58+
return arg.value;
59+
} else if (arg.type === 'BinaryExpression' && arg.operator === '+') {
60+
return getTKey(arg.left) + getTKey(arg.right);
61+
} else if (arg.type === 'TemplateLiteral') {
62+
return arg.quasis.map((q) => {
63+
return q.value.raw;
64+
}).join('');
65+
}
66+
return null;
67+
}
68+
69+
function getFormatStrings(str) {
70+
// Match anything that starts with %
71+
// We could make a regex that matched the full placeholder, but this
72+
// would just not match invalid placeholders and so wouldn't help us
73+
// detect the invalid ones.
74+
// Also note that for simplicity, this just matches a % character and then
75+
// anything up to the next % character (or a single %, or end of string).
76+
const formatStringRe = /%([^%]+|%|$)/g;
77+
const formatStrings = new Set();
78+
79+
let match;
80+
while ( (match = formatStringRe.exec(str)) !== null ) {
81+
const placeholder = match[1]; // Minus the leading '%'
82+
if (placeholder === '%') continue; // Literal % is %%
83+
84+
const placeholderMatch = placeholder.match(/^\((.*?)\)(.)/);
85+
if (placeholderMatch === null) {
86+
throw new Error("Invalid format specifier: '"+match[0]+"'");
87+
}
88+
if (placeholderMatch.length < 3) {
89+
throw new Error("Malformed format specifier");
90+
}
91+
const placeholderName = placeholderMatch[1];
92+
const placeholderFormat = placeholderMatch[2];
93+
94+
if (placeholderFormat !== 's') {
95+
throw new Error(`'${placeholderFormat}' used as format character: you probably meant 's'`);
96+
}
97+
98+
formatStrings.add(placeholderName);
99+
}
100+
101+
return formatStrings;
102+
}
103+
104+
function getTranslationsJs(file) {
105+
const contents = fs.readFileSync(file, { encoding: 'utf8' });
106+
107+
const trs = new Set();
108+
109+
try {
110+
const plugins = [
111+
// https://babeljs.io/docs/en/babel-parser#plugins
112+
"classProperties",
113+
"objectRestSpread",
114+
"throwExpressions",
115+
"exportDefaultFrom",
116+
"decorators-legacy",
117+
];
118+
119+
if (file.endsWith(".js") || file.endsWith(".jsx")) {
120+
// all JS is assumed to be flow or react
121+
plugins.push("flow", "jsx");
122+
} else if (file.endsWith(".ts")) {
123+
// TS can't use JSX unless it's a TSX file (otherwise angle casts fail)
124+
plugins.push("typescript");
125+
} else if (file.endsWith(".tsx")) {
126+
// When the file is a TSX file though, enable JSX parsing
127+
plugins.push("typescript", "jsx");
128+
}
129+
130+
const babelParsed = parser.parse(contents, {
131+
allowImportExportEverywhere: true,
132+
errorRecovery: true,
133+
sourceFilename: file,
134+
tokens: true,
135+
plugins,
136+
});
137+
traverse.default(babelParsed, {
138+
enter: (p) => {
139+
const node = p.node;
140+
if (p.isCallExpression() && node.callee && TRANSLATIONS_FUNCS.includes(node.callee.name)) {
141+
const tKey = getTKey(node.arguments[0]);
142+
143+
// This happens whenever we call _t with non-literals (ie. whenever we've
144+
// had to use a _td to compensate) so is expected.
145+
if (tKey === null) return;
146+
147+
// check the format string against the args
148+
// We only check _t: _td has no args
149+
if (node.callee.name === '_t') {
150+
try {
151+
const placeholders = getFormatStrings(tKey);
152+
for (const placeholder of placeholders) {
153+
if (node.arguments.length < 2) {
154+
throw new Error(`Placeholder found ('${placeholder}') but no substitutions given`);
155+
}
156+
const value = getObjectValue(node.arguments[1], placeholder);
157+
if (value === null) {
158+
throw new Error(`No value found for placeholder '${placeholder}'`);
159+
}
160+
}
161+
162+
// Validate tag replacements
163+
if (node.arguments.length > 2) {
164+
const tagMap = node.arguments[2];
165+
for (const prop of tagMap.properties || []) {
166+
if (prop.key.type === 'Literal') {
167+
const tag = prop.key.value;
168+
// RegExp same as in src/languageHandler.js
169+
const regexp = new RegExp(`(<${tag}>(.*?)<\\/${tag}>|<${tag}>|<${tag}\\s*\\/>)`);
170+
if (!tKey.match(regexp)) {
171+
throw new Error(`No match for ${regexp} in ${tKey}`);
172+
}
173+
}
174+
}
175+
}
176+
177+
} catch (e) {
178+
console.log();
179+
console.error(`ERROR: ${file}:${node.loc.start.line} ${tKey}`);
180+
console.error(e);
181+
process.exit(1);
182+
}
183+
}
184+
185+
let isPlural = false;
186+
if (node.arguments.length > 1 && node.arguments[1].type === 'ObjectExpression') {
187+
const countVal = getObjectValue(node.arguments[1], 'count');
188+
if (countVal) {
189+
isPlural = true;
190+
}
191+
}
192+
193+
if (isPlural) {
194+
trs.add(tKey + "|other");
195+
const plurals = enPlurals[tKey];
196+
if (plurals) {
197+
for (const pluralType of Object.keys(plurals)) {
198+
trs.add(tKey + "|" + pluralType);
199+
}
200+
}
201+
} else {
202+
trs.add(tKey);
203+
}
204+
}
205+
},
206+
});
207+
} catch (e) {
208+
console.error(e);
209+
process.exit(1);
210+
}
211+
212+
return trs;
213+
}
214+
215+
function getTranslationsOther(file) {
216+
const contents = fs.readFileSync(file, { encoding: 'utf8' });
217+
218+
const trs = new Set();
219+
220+
// Taken from element-web src/components/structures/HomePage.js
221+
const translationsRegex = /_t\(['"]([\s\S]*?)['"]\)/mg;
222+
let matches;
223+
while (matches = translationsRegex.exec(contents)) {
224+
trs.add(matches[1]);
225+
}
226+
return trs;
227+
}
228+
229+
// gather en_EN plural strings from the input translations file:
230+
// the en_EN strings are all in the source with the exception of
231+
// pluralised strings, which we need to pull in from elsewhere.
232+
const inputTranslationsRaw = JSON.parse(fs.readFileSync(INPUT_TRANSLATIONS_FILE, { encoding: 'utf8' }));
233+
const enPlurals = {};
234+
235+
for (const key of Object.keys(inputTranslationsRaw)) {
236+
const parts = key.split("|");
237+
if (parts.length > 1) {
238+
const plurals = enPlurals[parts[0]] || {};
239+
plurals[parts[1]] = inputTranslationsRaw[key];
240+
enPlurals[parts[0]] = plurals;
241+
}
242+
}
243+
244+
const translatables = new Set();
245+
246+
const walkOpts = {
247+
listeners: {
248+
names: function(root, nodeNamesArray) {
249+
// Sort the names case insensitively and alphabetically to
250+
// maintain some sense of order between the different strings.
251+
nodeNamesArray.sort((a, b) => {
252+
a = a.toLowerCase();
253+
b = b.toLowerCase();
254+
if (a > b) return 1;
255+
if (a < b) return -1;
256+
return 0;
257+
});
258+
},
259+
file: function(root, fileStats, next) {
260+
const fullPath = path.join(root, fileStats.name);
261+
262+
let trs;
263+
if (fileStats.name.endsWith('.js') || fileStats.name.endsWith('.ts') || fileStats.name.endsWith('.tsx')) {
264+
trs = getTranslationsJs(fullPath);
265+
} else if (fileStats.name.endsWith('.html')) {
266+
trs = getTranslationsOther(fullPath);
267+
} else {
268+
return;
269+
}
270+
console.log(`${fullPath} (${trs.size} strings)`);
271+
for (const tr of trs.values()) {
272+
// Convert DOS line endings to unix
273+
translatables.add(tr.replace(/\r\n/g, "\n"));
274+
}
275+
},
276+
}
277+
};
278+
279+
for (const path of SEARCH_PATHS) {
280+
if (fs.existsSync(path)) {
281+
walk.walkSync(path, walkOpts);
282+
}
283+
}
284+
285+
const trObj = {};
286+
for (const tr of translatables) {
287+
if (tr.includes("|")) {
288+
if (inputTranslationsRaw[tr]) {
289+
trObj[tr] = inputTranslationsRaw[tr];
290+
} else {
291+
trObj[tr] = tr.split("|")[0];
292+
}
293+
} else {
294+
trObj[tr] = tr;
295+
}
296+
}
297+
298+
fs.writeFileSync(
299+
OUTPUT_FILE,
300+
JSON.stringify(trObj, translatables.values(), 4) + "\n"
301+
);
302+
303+
console.log();
304+
console.log(`Wrote ${translatables.size} strings to ${OUTPUT_FILE}`);

0 commit comments

Comments
 (0)