Skip to content

Commit 0cbbfa0

Browse files
Translation Metadata addition to GLaaS (#189)
- HTML annotation for maxlength and keywords based on block-schema (its-storage-size, its-loc-note, its-loc-note-type) - Per-language keywords metadata (langMetadata) sent to GLaaS - Single <p> tag unwrapping for cleaner HTML structure
1 parent f51cc91 commit 0cbbfa0

File tree

6 files changed

+1293
-2
lines changed

6 files changed

+1293
-2
lines changed

nx/blocks/loc/connectors/glaas/api.js

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,12 +119,21 @@ export async function addAssets({
119119
assetName: glaasFilename,
120120
assetType: 'SOURCE',
121121
targetLocales,
122-
metadata: { 'source-preview-url': item.aemHref.replace(/\/index$/, '/') },
123122
};
124123

125124
// GLaaS v1.2
126125
body.append('file', file, glaasFilename);
127126

127+
const assetMetadata = {
128+
assetName: glaasFilename,
129+
metadata: { 'source-preview-url': item.aemHref.replace(/\/index$/, '/') },
130+
...(item.translationMetadata && { langMetadata: item.translationMetadata }),
131+
};
132+
body.append('_asset_metadata_', new Blob(
133+
[JSON.stringify(assetMetadata)],
134+
{ type: 'application/json' },
135+
));
136+
128137
const opts = getOpts(clientid, token, body, null, 'POST');
129138
// Add fileDetails parameter for GLaaS v1.2
130139
const url = `${origin}/api/l10n/v1.2/tasks/${workflow}/${name}/assets?targetLanguages=${targetLocales.join(',')}&fileDetails=${encodeURIComponent(JSON.stringify(fileDetails))}`;

nx/blocks/loc/connectors/glaas/index.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import { getGlaasToken, connectToGlaas } from './auth.js';
77
import { addDnt, removeDnt } from './dnt.js';
88
import { groupUrlsByWorkflow } from './locPageRules.js';
99
import { fetchConfig } from '../../utils/utils.js';
10+
import { addTranslationMetadata } from './translationMetadata.js';
1011

1112
function determineStatus(translation) {
1213
if (translation.error > 0) return 'failed';
@@ -282,7 +283,7 @@ export async function sendAllLanguages({
282283
}) {
283284
const timestamp = Date.now();
284285
const tasks = await getTasks(org, site, title, langs, urls, timestamp);
285-
286+
await addTranslationMetadata(org, site, langs, urls);
286287
for (const key of Object.keys(tasks)) {
287288
await sendTask(service, tasks[key], urls, actions);
288289
}
Lines changed: 279 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,279 @@
1+
import { DA_ORIGIN } from '../../../../public/utils/constants.js';
2+
import { daFetch } from '../../../../utils/daFetch.js';
3+
4+
const BLOCK_SCHEMA_PATH = '/.da/block-schema.json';
5+
6+
let blockSchemaCache;
7+
8+
export function processSchemaKey(schemaKey) {
9+
const match = schemaKey.match(/^([\w-]+)\s*\((.*)\)$/);
10+
if (!match) {
11+
return {
12+
id: schemaKey,
13+
selector: `.${schemaKey}`,
14+
};
15+
}
16+
const [, blockType, classesStr] = match;
17+
const classes = classesStr.split(',').map((c) => c.trim()).sort();
18+
return {
19+
id: `${blockType}_${classes.join('_')}`,
20+
selector: `.${blockType}.${classes.join('.')}`,
21+
};
22+
}
23+
24+
const fieldKeyCache = new Map();
25+
26+
export function fieldNameToKey(fieldName) {
27+
let key = fieldKeyCache.get(fieldName);
28+
if (key !== undefined) return key;
29+
key = fieldName
30+
.toLowerCase()
31+
.replace(/[^\w\s-]/g, '') // Remove special chars except word chars, spaces, hyphens
32+
.replace(/\s+/g, '-') // Replace spaces with hyphens
33+
.replace(/-+/g, '-'); // Collapse multiple hyphens
34+
fieldKeyCache.set(fieldName, key);
35+
return key;
36+
}
37+
38+
export function languageNameToCode(languageName, projectLangs) {
39+
const normalizedName = languageName.toLowerCase();
40+
const lang = projectLangs.find((l) => l.name?.toLowerCase() === normalizedName);
41+
return lang ? lang.code : null;
42+
}
43+
44+
export function parseBlockSchema(schemaData) {
45+
const parsedSchema = {};
46+
47+
Object.keys(schemaData).forEach((key) => {
48+
if (key.startsWith(':')) return;
49+
const blockData = schemaData[key];
50+
if (!blockData.data) return;
51+
const { id, selector } = processSchemaKey(key);
52+
const fields = [];
53+
blockData.data.forEach((field) => {
54+
const fieldName = field['field name'];
55+
const charCount = field['character count'];
56+
const keywordsInjection = field['keywords injection'];
57+
if (!fieldName) return;
58+
const hasCharCount = charCount && charCount.trim() !== '';
59+
const hasKeywordsInjection = !!(keywordsInjection
60+
&& ['yes', 'true'].includes(keywordsInjection.toLowerCase()));
61+
if (hasCharCount || hasKeywordsInjection) {
62+
fields.push({
63+
fieldName,
64+
fieldKey: fieldNameToKey(fieldName),
65+
charCount: hasCharCount ? charCount : '',
66+
keywordsInjection: hasKeywordsInjection,
67+
});
68+
}
69+
});
70+
if (fields.length > 0) {
71+
parsedSchema[id] = {
72+
selector,
73+
fields,
74+
};
75+
}
76+
});
77+
return parsedSchema;
78+
}
79+
80+
export async function fetchBlockSchema(org, site, { reset = false } = {}) {
81+
if (blockSchemaCache && !reset) return blockSchemaCache;
82+
const url = `${DA_ORIGIN}/source/${org}/${site}${BLOCK_SCHEMA_PATH}`;
83+
try {
84+
const resp = await daFetch(url);
85+
if (!resp.ok) return null;
86+
const schemaData = await resp.json();
87+
const parsedSchema = parseBlockSchema(schemaData);
88+
blockSchemaCache = parsedSchema;
89+
return parsedSchema;
90+
} catch (error) {
91+
// eslint-disable-next-line no-console
92+
console.error('Error fetching block schema:', error);
93+
return null;
94+
}
95+
}
96+
97+
export function needsKeywordsMetadata(parsedSchema) {
98+
if (!parsedSchema || Object.keys(parsedSchema).length === 0) return false;
99+
const hasKeywords = (block) => block.fields.some((f) => f.keywordsInjection);
100+
return Object.values(parsedSchema).some(hasKeywords);
101+
}
102+
103+
export async function fetchKeywordsFile(org, site, pagePath) {
104+
// Remove .html extension if present and add -keywords.json
105+
const cleanPath = pagePath.replace(/\.html$/, '');
106+
const keywordsPath = `${cleanPath}-keywords.json`;
107+
// Try primary path
108+
let url = `${DA_ORIGIN}/source/${org}/${site}${keywordsPath}`;
109+
try {
110+
const resp = await daFetch(url);
111+
if (resp.ok) {
112+
return resp.json();
113+
}
114+
// If 404 and path contains /langstore/, try fallback
115+
if (resp.status === 404 && keywordsPath.includes('/langstore/')) {
116+
const fallbackPath = keywordsPath.replace(/\/langstore\/[^/]+\//, '/');
117+
url = `${DA_ORIGIN}/source/${org}/${site}${fallbackPath}`;
118+
const fallbackResp = await daFetch(url);
119+
if (fallbackResp.ok) {
120+
return fallbackResp.json();
121+
}
122+
}
123+
return null;
124+
} catch (error) {
125+
// eslint-disable-next-line no-console
126+
console.error('Error fetching keywords file:', error);
127+
return null;
128+
}
129+
}
130+
131+
/**
132+
* Unwraps single <p> tags from block row divs
133+
* Converts <div><p>Text</p></div> to <div>Text</div>
134+
* Multi-paragraph content is preserved
135+
* @param {Document} doc - Parsed HTML document
136+
*/
137+
function unwrapSoleParagraphs(doc) {
138+
doc.querySelectorAll('div[class] > div > div').forEach((div) => {
139+
if (div.children.length === 1 && div.children[0].tagName === 'P') {
140+
const pTag = div.children[0];
141+
div.replaceChildren(...pTag.childNodes);
142+
}
143+
});
144+
}
145+
146+
/**
147+
* Check if a div contains exactly the field name (with or without <p> wrapper)
148+
* Returns true only if:
149+
* - <div><p>Field Name</p></div> (and nothing else)
150+
* - <div>Field Name</div> (and nothing else)
151+
* Resilient to both unwrapped and wrapped content
152+
*/
153+
function isExactMatch(div, fieldName) {
154+
const trimmedFieldName = fieldName.trim();
155+
// Case 1: <div><p>Field Name</p></div> - p tag must have no children (only text)
156+
if (div.children.length === 1 && div.children[0].tagName === 'P' && div.children[0].children.length === 0) {
157+
return div.children[0].textContent.trim() === trimmedFieldName;
158+
}
159+
// Case 2: <div>Field Name</div> - no children at all
160+
if (div.children.length === 0) {
161+
return div.textContent.trim() === trimmedFieldName;
162+
}
163+
// Case 3: Any other structure (multiple children, nested elements) - no match
164+
return false;
165+
}
166+
167+
export function annotateHTML(htmlContent, parsedSchema) {
168+
if (!htmlContent) {
169+
return htmlContent;
170+
}
171+
const parser = new DOMParser();
172+
const doc = parser.parseFromString(htmlContent, 'text/html');
173+
unwrapSoleParagraphs(doc);
174+
if (!parsedSchema || Object.keys(parsedSchema).length === 0) {
175+
return doc.body.innerHTML;
176+
}
177+
Object.entries(parsedSchema).forEach(([blockId, block]) => {
178+
const { selector, fields } = block;
179+
const blockElements = doc.querySelectorAll(selector);
180+
blockElements.forEach((blockElement, blockIndex) => {
181+
const rows = blockElement.querySelectorAll(':scope > div');
182+
rows.forEach((row) => {
183+
const labelDiv = row.children[0];
184+
const contentDiv = row.children[1];
185+
if (!labelDiv || !contentDiv || labelDiv.tagName !== 'DIV' || contentDiv.tagName !== 'DIV') {
186+
return;
187+
}
188+
const field = fields.find((f) => isExactMatch(labelDiv, f.fieldName));
189+
if (!field) return;
190+
const { fieldName, fieldKey, charCount, keywordsInjection } = field;
191+
if (charCount) {
192+
contentDiv.setAttribute('its-storage-size', charCount);
193+
}
194+
const keywordsValue = String(keywordsInjection);
195+
const locNoteValue = `block-name=${blockId}_${blockIndex + 1}_${fieldKey}|fieldName=${fieldName}|apply-keywords=${keywordsValue}`;
196+
contentDiv.setAttribute('its-loc-note', locNoteValue);
197+
contentDiv.setAttribute('its-loc-note-type', 'description');
198+
});
199+
});
200+
});
201+
202+
return doc.body.innerHTML;
203+
}
204+
205+
export function buildLanguageMetadata(keywordsData, langs) {
206+
if (!keywordsData || !langs) return {};
207+
const targetLangCodes = new Set(langs.map((lang) => lang.code));
208+
const langCodeByName = new Map();
209+
const getLangCode = (languageName) => {
210+
const normalizedName = languageName.toLowerCase();
211+
let code = langCodeByName.get(normalizedName);
212+
if (code === undefined) {
213+
code = languageNameToCode(languageName, langs);
214+
langCodeByName.set(normalizedName, code);
215+
}
216+
return code;
217+
};
218+
const langMetadata = {};
219+
Object.entries(keywordsData).forEach(([key, blockData]) => {
220+
if (key.startsWith(':') || !blockData?.data) return;
221+
// Parse the key: "aso-app (apple, listing) (1)" -> blockId + index
222+
const indexMatch = key.match(/\((\d+)\)$/);
223+
if (!indexMatch) return;
224+
const index = indexMatch[1];
225+
const blockKeyWithoutIndex = key.replace(/\s*\(\d+\)$/, '').trim();
226+
const { id: blockId } = processSchemaKey(blockKeyWithoutIndex);
227+
// Process each language entry
228+
blockData.data.forEach((entry) => {
229+
const languageName = entry.language;
230+
if (!languageName) return;
231+
const langCode = getLangCode(languageName);
232+
if (!langCode || !targetLangCodes.has(langCode)) return;
233+
if (!langMetadata[langCode]) {
234+
langMetadata[langCode] = {};
235+
}
236+
Object.keys(entry).forEach((fieldName) => {
237+
if (fieldName === 'language') return;
238+
const keywordValue = entry[fieldName];
239+
if (!keywordValue || !keywordValue.trim()) return;
240+
const fieldKey = fieldNameToKey(fieldName);
241+
const metadataKey = `keywords|${blockId}_${index}_${fieldKey}`;
242+
langMetadata[langCode][metadataKey] = keywordValue;
243+
});
244+
});
245+
});
246+
247+
return langMetadata;
248+
}
249+
250+
/**
251+
* Add translation metadata to URLs (HTML annotation + keywords)
252+
* Modifies url.content and url.translationMetadata in place
253+
* @param {string} org - Organization name
254+
* @param {string} site - Site name
255+
* @param {Array} langs - Array of language objects with .name and .code
256+
* @param {Array} urls - Array of URL objects with .content and .suppliedPath
257+
*/
258+
export async function addTranslationMetadata(org, site, langs, urls) {
259+
// Fetch block schema (cached)
260+
const blockSchema = await fetchBlockSchema(org, site);
261+
if (!blockSchema) {
262+
return; // No schema, no metadata
263+
}
264+
265+
const hasKeywords = needsKeywordsMetadata(blockSchema);
266+
267+
await Promise.all(urls.map(async (url) => {
268+
if (url.content && typeof url.content === 'string') {
269+
url.content = annotateHTML(url.content, blockSchema);
270+
}
271+
if (!hasKeywords) return;
272+
const keywordsData = await fetchKeywordsFile(org, site, url.suppliedPath);
273+
if (!keywordsData) return;
274+
const langMetadata = buildLanguageMetadata(keywordsData, langs);
275+
if (langMetadata && Object.keys(langMetadata).length > 0) {
276+
url.translationMetadata = langMetadata;
277+
}
278+
}));
279+
}

0 commit comments

Comments
 (0)