|
| 1 | +import { DA_ORIGIN } from '../../../../public/utils/constants.js'; |
| 2 | +import { daFetch } from '../../../../utils/daFetch.js'; |
| 3 | + |
| 4 | +const BLOCK_SCHEMA_PATH = '/.da/block-schema.json'; |
| 5 | + |
| 6 | +let blockSchemaCache; |
| 7 | + |
| 8 | +export function processSchemaKey(schemaKey) { |
| 9 | + const match = schemaKey.match(/^([\w-]+)\s*\((.*)\)$/); |
| 10 | + if (!match) { |
| 11 | + return { |
| 12 | + id: schemaKey, |
| 13 | + selector: `.${schemaKey}`, |
| 14 | + }; |
| 15 | + } |
| 16 | + const [, blockType, classesStr] = match; |
| 17 | + const classes = classesStr.split(',').map((c) => c.trim()).sort(); |
| 18 | + return { |
| 19 | + id: `${blockType}_${classes.join('_')}`, |
| 20 | + selector: `.${blockType}.${classes.join('.')}`, |
| 21 | + }; |
| 22 | +} |
| 23 | + |
| 24 | +const fieldKeyCache = new Map(); |
| 25 | + |
| 26 | +export function fieldNameToKey(fieldName) { |
| 27 | + let key = fieldKeyCache.get(fieldName); |
| 28 | + if (key !== undefined) return key; |
| 29 | + key = fieldName |
| 30 | + .toLowerCase() |
| 31 | + .replace(/[^\w\s-]/g, '') // Remove special chars except word chars, spaces, hyphens |
| 32 | + .replace(/\s+/g, '-') // Replace spaces with hyphens |
| 33 | + .replace(/-+/g, '-'); // Collapse multiple hyphens |
| 34 | + fieldKeyCache.set(fieldName, key); |
| 35 | + return key; |
| 36 | +} |
| 37 | + |
| 38 | +export function languageNameToCode(languageName, projectLangs) { |
| 39 | + const normalizedName = languageName.toLowerCase(); |
| 40 | + const lang = projectLangs.find((l) => l.name?.toLowerCase() === normalizedName); |
| 41 | + return lang ? lang.code : null; |
| 42 | +} |
| 43 | + |
| 44 | +export function parseBlockSchema(schemaData) { |
| 45 | + const parsedSchema = {}; |
| 46 | + |
| 47 | + Object.keys(schemaData).forEach((key) => { |
| 48 | + if (key.startsWith(':')) return; |
| 49 | + const blockData = schemaData[key]; |
| 50 | + if (!blockData.data) return; |
| 51 | + const { id, selector } = processSchemaKey(key); |
| 52 | + const fields = []; |
| 53 | + blockData.data.forEach((field) => { |
| 54 | + const fieldName = field['field name']; |
| 55 | + const charCount = field['character count']; |
| 56 | + const keywordsInjection = field['keywords injection']; |
| 57 | + if (!fieldName) return; |
| 58 | + const hasCharCount = charCount && charCount.trim() !== ''; |
| 59 | + const hasKeywordsInjection = !!(keywordsInjection |
| 60 | + && ['yes', 'true'].includes(keywordsInjection.toLowerCase())); |
| 61 | + if (hasCharCount || hasKeywordsInjection) { |
| 62 | + fields.push({ |
| 63 | + fieldName, |
| 64 | + fieldKey: fieldNameToKey(fieldName), |
| 65 | + charCount: hasCharCount ? charCount : '', |
| 66 | + keywordsInjection: hasKeywordsInjection, |
| 67 | + }); |
| 68 | + } |
| 69 | + }); |
| 70 | + if (fields.length > 0) { |
| 71 | + parsedSchema[id] = { |
| 72 | + selector, |
| 73 | + fields, |
| 74 | + }; |
| 75 | + } |
| 76 | + }); |
| 77 | + return parsedSchema; |
| 78 | +} |
| 79 | + |
| 80 | +export async function fetchBlockSchema(org, site, { reset = false } = {}) { |
| 81 | + if (blockSchemaCache && !reset) return blockSchemaCache; |
| 82 | + const url = `${DA_ORIGIN}/source/${org}/${site}${BLOCK_SCHEMA_PATH}`; |
| 83 | + try { |
| 84 | + const resp = await daFetch(url); |
| 85 | + if (!resp.ok) return null; |
| 86 | + const schemaData = await resp.json(); |
| 87 | + const parsedSchema = parseBlockSchema(schemaData); |
| 88 | + blockSchemaCache = parsedSchema; |
| 89 | + return parsedSchema; |
| 90 | + } catch (error) { |
| 91 | + // eslint-disable-next-line no-console |
| 92 | + console.error('Error fetching block schema:', error); |
| 93 | + return null; |
| 94 | + } |
| 95 | +} |
| 96 | + |
| 97 | +export function needsKeywordsMetadata(parsedSchema) { |
| 98 | + if (!parsedSchema || Object.keys(parsedSchema).length === 0) return false; |
| 99 | + const hasKeywords = (block) => block.fields.some((f) => f.keywordsInjection); |
| 100 | + return Object.values(parsedSchema).some(hasKeywords); |
| 101 | +} |
| 102 | + |
| 103 | +export async function fetchKeywordsFile(org, site, pagePath) { |
| 104 | + // Remove .html extension if present and add -keywords.json |
| 105 | + const cleanPath = pagePath.replace(/\.html$/, ''); |
| 106 | + const keywordsPath = `${cleanPath}-keywords.json`; |
| 107 | + // Try primary path |
| 108 | + let url = `${DA_ORIGIN}/source/${org}/${site}${keywordsPath}`; |
| 109 | + try { |
| 110 | + const resp = await daFetch(url); |
| 111 | + if (resp.ok) { |
| 112 | + return resp.json(); |
| 113 | + } |
| 114 | + // If 404 and path contains /langstore/, try fallback |
| 115 | + if (resp.status === 404 && keywordsPath.includes('/langstore/')) { |
| 116 | + const fallbackPath = keywordsPath.replace(/\/langstore\/[^/]+\//, '/'); |
| 117 | + url = `${DA_ORIGIN}/source/${org}/${site}${fallbackPath}`; |
| 118 | + const fallbackResp = await daFetch(url); |
| 119 | + if (fallbackResp.ok) { |
| 120 | + return fallbackResp.json(); |
| 121 | + } |
| 122 | + } |
| 123 | + return null; |
| 124 | + } catch (error) { |
| 125 | + // eslint-disable-next-line no-console |
| 126 | + console.error('Error fetching keywords file:', error); |
| 127 | + return null; |
| 128 | + } |
| 129 | +} |
| 130 | + |
| 131 | +/** |
| 132 | + * Unwraps single <p> tags from block row divs |
| 133 | + * Converts <div><p>Text</p></div> to <div>Text</div> |
| 134 | + * Multi-paragraph content is preserved |
| 135 | + * @param {Document} doc - Parsed HTML document |
| 136 | + */ |
| 137 | +function unwrapSoleParagraphs(doc) { |
| 138 | + doc.querySelectorAll('div[class] > div > div').forEach((div) => { |
| 139 | + if (div.children.length === 1 && div.children[0].tagName === 'P') { |
| 140 | + const pTag = div.children[0]; |
| 141 | + div.replaceChildren(...pTag.childNodes); |
| 142 | + } |
| 143 | + }); |
| 144 | +} |
| 145 | + |
| 146 | +/** |
| 147 | + * Check if a div contains exactly the field name (with or without <p> wrapper) |
| 148 | + * Returns true only if: |
| 149 | + * - <div><p>Field Name</p></div> (and nothing else) |
| 150 | + * - <div>Field Name</div> (and nothing else) |
| 151 | + * Resilient to both unwrapped and wrapped content |
| 152 | + */ |
| 153 | +function isExactMatch(div, fieldName) { |
| 154 | + const trimmedFieldName = fieldName.trim(); |
| 155 | + // Case 1: <div><p>Field Name</p></div> - p tag must have no children (only text) |
| 156 | + if (div.children.length === 1 && div.children[0].tagName === 'P' && div.children[0].children.length === 0) { |
| 157 | + return div.children[0].textContent.trim() === trimmedFieldName; |
| 158 | + } |
| 159 | + // Case 2: <div>Field Name</div> - no children at all |
| 160 | + if (div.children.length === 0) { |
| 161 | + return div.textContent.trim() === trimmedFieldName; |
| 162 | + } |
| 163 | + // Case 3: Any other structure (multiple children, nested elements) - no match |
| 164 | + return false; |
| 165 | +} |
| 166 | + |
| 167 | +export function annotateHTML(htmlContent, parsedSchema) { |
| 168 | + if (!htmlContent) { |
| 169 | + return htmlContent; |
| 170 | + } |
| 171 | + const parser = new DOMParser(); |
| 172 | + const doc = parser.parseFromString(htmlContent, 'text/html'); |
| 173 | + unwrapSoleParagraphs(doc); |
| 174 | + if (!parsedSchema || Object.keys(parsedSchema).length === 0) { |
| 175 | + return doc.body.innerHTML; |
| 176 | + } |
| 177 | + Object.entries(parsedSchema).forEach(([blockId, block]) => { |
| 178 | + const { selector, fields } = block; |
| 179 | + const blockElements = doc.querySelectorAll(selector); |
| 180 | + blockElements.forEach((blockElement, blockIndex) => { |
| 181 | + const rows = blockElement.querySelectorAll(':scope > div'); |
| 182 | + rows.forEach((row) => { |
| 183 | + const labelDiv = row.children[0]; |
| 184 | + const contentDiv = row.children[1]; |
| 185 | + if (!labelDiv || !contentDiv || labelDiv.tagName !== 'DIV' || contentDiv.tagName !== 'DIV') { |
| 186 | + return; |
| 187 | + } |
| 188 | + const field = fields.find((f) => isExactMatch(labelDiv, f.fieldName)); |
| 189 | + if (!field) return; |
| 190 | + const { fieldName, fieldKey, charCount, keywordsInjection } = field; |
| 191 | + if (charCount) { |
| 192 | + contentDiv.setAttribute('its-storage-size', charCount); |
| 193 | + } |
| 194 | + const keywordsValue = String(keywordsInjection); |
| 195 | + const locNoteValue = `block-name=${blockId}_${blockIndex + 1}_${fieldKey}|fieldName=${fieldName}|apply-keywords=${keywordsValue}`; |
| 196 | + contentDiv.setAttribute('its-loc-note', locNoteValue); |
| 197 | + contentDiv.setAttribute('its-loc-note-type', 'description'); |
| 198 | + }); |
| 199 | + }); |
| 200 | + }); |
| 201 | + |
| 202 | + return doc.body.innerHTML; |
| 203 | +} |
| 204 | + |
| 205 | +export function buildLanguageMetadata(keywordsData, langs) { |
| 206 | + if (!keywordsData || !langs) return {}; |
| 207 | + const targetLangCodes = new Set(langs.map((lang) => lang.code)); |
| 208 | + const langCodeByName = new Map(); |
| 209 | + const getLangCode = (languageName) => { |
| 210 | + const normalizedName = languageName.toLowerCase(); |
| 211 | + let code = langCodeByName.get(normalizedName); |
| 212 | + if (code === undefined) { |
| 213 | + code = languageNameToCode(languageName, langs); |
| 214 | + langCodeByName.set(normalizedName, code); |
| 215 | + } |
| 216 | + return code; |
| 217 | + }; |
| 218 | + const langMetadata = {}; |
| 219 | + Object.entries(keywordsData).forEach(([key, blockData]) => { |
| 220 | + if (key.startsWith(':') || !blockData?.data) return; |
| 221 | + // Parse the key: "aso-app (apple, listing) (1)" -> blockId + index |
| 222 | + const indexMatch = key.match(/\((\d+)\)$/); |
| 223 | + if (!indexMatch) return; |
| 224 | + const index = indexMatch[1]; |
| 225 | + const blockKeyWithoutIndex = key.replace(/\s*\(\d+\)$/, '').trim(); |
| 226 | + const { id: blockId } = processSchemaKey(blockKeyWithoutIndex); |
| 227 | + // Process each language entry |
| 228 | + blockData.data.forEach((entry) => { |
| 229 | + const languageName = entry.language; |
| 230 | + if (!languageName) return; |
| 231 | + const langCode = getLangCode(languageName); |
| 232 | + if (!langCode || !targetLangCodes.has(langCode)) return; |
| 233 | + if (!langMetadata[langCode]) { |
| 234 | + langMetadata[langCode] = {}; |
| 235 | + } |
| 236 | + Object.keys(entry).forEach((fieldName) => { |
| 237 | + if (fieldName === 'language') return; |
| 238 | + const keywordValue = entry[fieldName]; |
| 239 | + if (!keywordValue || !keywordValue.trim()) return; |
| 240 | + const fieldKey = fieldNameToKey(fieldName); |
| 241 | + const metadataKey = `keywords|${blockId}_${index}_${fieldKey}`; |
| 242 | + langMetadata[langCode][metadataKey] = keywordValue; |
| 243 | + }); |
| 244 | + }); |
| 245 | + }); |
| 246 | + |
| 247 | + return langMetadata; |
| 248 | +} |
| 249 | + |
| 250 | +/** |
| 251 | + * Add translation metadata to URLs (HTML annotation + keywords) |
| 252 | + * Modifies url.content and url.translationMetadata in place |
| 253 | + * @param {string} org - Organization name |
| 254 | + * @param {string} site - Site name |
| 255 | + * @param {Array} langs - Array of language objects with .name and .code |
| 256 | + * @param {Array} urls - Array of URL objects with .content and .suppliedPath |
| 257 | + */ |
| 258 | +export async function addTranslationMetadata(org, site, langs, urls) { |
| 259 | + // Fetch block schema (cached) |
| 260 | + const blockSchema = await fetchBlockSchema(org, site); |
| 261 | + if (!blockSchema) { |
| 262 | + return; // No schema, no metadata |
| 263 | + } |
| 264 | + |
| 265 | + const hasKeywords = needsKeywordsMetadata(blockSchema); |
| 266 | + |
| 267 | + await Promise.all(urls.map(async (url) => { |
| 268 | + if (url.content && typeof url.content === 'string') { |
| 269 | + url.content = annotateHTML(url.content, blockSchema); |
| 270 | + } |
| 271 | + if (!hasKeywords) return; |
| 272 | + const keywordsData = await fetchKeywordsFile(org, site, url.suppliedPath); |
| 273 | + if (!keywordsData) return; |
| 274 | + const langMetadata = buildLanguageMetadata(keywordsData, langs); |
| 275 | + if (langMetadata && Object.keys(langMetadata).length > 0) { |
| 276 | + url.translationMetadata = langMetadata; |
| 277 | + } |
| 278 | + })); |
| 279 | +} |
0 commit comments