|
| 1 | +'use client'; |
| 2 | +import React, { useState } from 'react'; |
| 3 | +import { loadAsync } from 'jszip'; |
| 4 | + |
| 5 | +function normalizePath(p: string): string { |
| 6 | + // Remove any array-style indices like users[0].name -> users.name (we only care about field names) |
| 7 | + return ( |
| 8 | + p |
| 9 | + // strip any embedded XML tags that may have been captured across DOCX text runs |
| 10 | + // eslint-disable-next-line sonarjs/slow-regex |
| 11 | + .replaceAll(/<[^>]+>/g, '') |
| 12 | + // remove array indices |
| 13 | + // eslint-disable-next-line sonarjs/slow-regex |
| 14 | + .replaceAll(/\[[^\]]*]/g, '') |
| 15 | + // collapse whitespace |
| 16 | + .replaceAll(/\s+/g, '') |
| 17 | + // collapse multiple dots |
| 18 | + .replaceAll(/\.+/g, '.') |
| 19 | + // trim leading/trailing dots |
| 20 | + .replace(/^\./, '') |
| 21 | + .replace(/\.$/, '') |
| 22 | + ); |
| 23 | +} |
| 24 | + |
| 25 | +// eslint-disable-next-line sonarjs/cognitive-complexity |
| 26 | +async function extractMarkersFromDocx(templateSource: File): Promise<string[]> { |
| 27 | + try { |
| 28 | + const ab = await templateSource.arrayBuffer(); |
| 29 | + |
| 30 | + // eslint-disable-next-line sonarjs/no-unsafe-unzip |
| 31 | + const zip = await loadAsync(ab); |
| 32 | + |
| 33 | + const dataMarkers = new Set<string>(); |
| 34 | + |
| 35 | + for (const [name, entry] of Object.entries(zip.files)) { |
| 36 | + if (!/\.xml$/i.test(name)) continue; |
| 37 | + |
| 38 | + const isWord = name.startsWith('word/'); |
| 39 | + if (!isWord) continue; |
| 40 | + |
| 41 | + let xml = ''; |
| 42 | + try { |
| 43 | + xml = await entry.async('string'); |
| 44 | + } catch { |
| 45 | + continue; |
| 46 | + } |
| 47 | + |
| 48 | + const directRe = /{([^{]+?)}/g; |
| 49 | + let m: RegExpExecArray | null; |
| 50 | + |
| 51 | + while ((m = directRe.exec(xml)) !== null) { |
| 52 | + const rawPath = m[1]; |
| 53 | + if (rawPath) { |
| 54 | + const path = normalizePath(rawPath); |
| 55 | + if (!path.startsWith('d.')) continue; |
| 56 | + |
| 57 | + dataMarkers.add(path.replace(/^d\./, '')); |
| 58 | + } |
| 59 | + } |
| 60 | + } |
| 61 | + |
| 62 | + const markers = [...dataMarkers].filter(Boolean).sort(); |
| 63 | + return markers; |
| 64 | + } catch { |
| 65 | + return []; |
| 66 | + } |
| 67 | +} |
| 68 | + |
| 69 | +const DocxExtract: React.FC = () => { |
| 70 | + const [params, setParams] = useState<string[]>([]); |
| 71 | + |
| 72 | + const handleFile = async (event: React.ChangeEvent<HTMLInputElement>) => { |
| 73 | + const file = event.target.files?.[0]; |
| 74 | + if (!file) return; |
| 75 | + const markers = await extractMarkersFromDocx(file); |
| 76 | + |
| 77 | + setParams(markers); |
| 78 | + }; |
| 79 | + |
| 80 | + return ( |
| 81 | + <> |
| 82 | + <input type='file' accept='.docx' onChange={handleFile} /> |
| 83 | + <br /> |
| 84 | + <ul> |
| 85 | + {params.map((p, i) => ( |
| 86 | + <li key={i}>{p}</li> |
| 87 | + ))} |
| 88 | + </ul> |
| 89 | + </> |
| 90 | + ); |
| 91 | +}; |
| 92 | + |
| 93 | +export default DocxExtract; |
0 commit comments