|
| 1 | +const icon = `<svg width="16" height="16" class="Draftail-Icon" aria-hidden="true" viewBox="0 0 576 512" fill="currentColor"><path d="M234.7 42.7L197 56.8c-3 1.1-5 4-5 7.2s2 6.1 5 7.2l37.7 14.1L248.8 123c1.1 3 4 5 7.2 5s6.1-2 7.2-5l14.1-37.7L315 71.2c3-1.1 5-4 5-7.2s-2-6.1-5-7.2L277.3 42.7 263.2 5c-1.1-3-4-5-7.2-5s-6.1 2-7.2 5L234.7 42.7zM46.1 395.4c-18.7 18.7-18.7 49.1 0 67.9l34.6 34.6c18.7 18.7 49.1 18.7 67.9 0L529.9 116.5c18.7-18.7 18.7-49.1 0-67.9L495.3 14.1c-18.7-18.7-49.1-18.7-67.9 0L46.1 395.4zM484.6 82.6l-105 105-23.3-23.3 105-105 23.3 23.3zM7.5 117.2C3 118.9 0 123.2 0 128s3 9.1 7.5 10.8L64 160l21.2 56.5c1.7 4.5 6 7.5 10.8 7.5s9.1-3 10.8-7.5L128 160l56.5-21.2c4.5-1.7 7.5-6 7.5-10.8s-3-9.1-7.5-10.8L128 96 106.8 39.5C105.1 35 100.8 32 96 32s-9.1 3-10.8 7.5L64 96 7.5 117.2zm352 256c-4.5 1.7-7.5 6-7.5 10.8s3 9.1 7.5 10.8L416 416l21.2 56.5c1.7 4.5 6 7.5 10.8 7.5s9.1-3 10.8-7.5L480 416l56.5-21.2c4.5-1.7 7.5-6 7.5-10.8s-3-9.1-7.5-10.8L480 352l-21.2-56.5c-1.7-4.5-6-7.5-10.8-7.5s-9.1 3-10.8 7.5L416 352l-56.5 21.2z"></path></svg>`; |
| 2 | + |
| 3 | +class AltTextController extends window.StimulusModule.Controller { |
| 4 | + static targets = ['suggest']; |
| 5 | + static values = { |
| 6 | + imageInput: { default: '', type: String }, |
| 7 | + captionInput: { default: '', type: String }, |
| 8 | + contextual: { default: false, type: Boolean }, |
| 9 | + }; |
| 10 | + |
| 11 | + /** An image-to-text pipeline, shared between all instances of this controller. */ |
| 12 | + static captioner; |
| 13 | + /** A text-to-text pipeline for enhancing captions, shared between all instances of this controller. */ |
| 14 | + static text2text; |
| 15 | + static { |
| 16 | + import('https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2').then( |
| 17 | + ({ pipeline }) => { |
| 18 | + this.captioner = pipeline('image-to-text', 'Mozilla/distilvit'); |
| 19 | + this.text2text = pipeline( |
| 20 | + 'text2text-generation', |
| 21 | + 'Xenova/LaMini-Flan-T5-783M', |
| 22 | + ); |
| 23 | + }, |
| 24 | + ); |
| 25 | + } |
| 26 | + |
| 27 | + /** |
| 28 | + * Convert an array of input elements into a single string, |
| 29 | + * concatenating their values or inner text. |
| 30 | + * @param {Array<HTMLInputElement | HTMLTextAreaElement | HTMLDivElement>} inputs |
| 31 | + * @returns {string} The concatenated text from the inputs |
| 32 | + */ |
| 33 | + static inputsToText = (inputs) => |
| 34 | + inputs |
| 35 | + .map((input) => input.value || input.innerText) |
| 36 | + .filter((text) => !!text.trim()) |
| 37 | + .join('\n\n'); |
| 38 | + |
| 39 | + get imageURL() { |
| 40 | + return this.element.querySelector('img[data-chooser-image]')?.src || ''; |
| 41 | + } |
| 42 | + |
| 43 | + // Override only for JSDoc/typing purposes, not for functionality |
| 44 | + /** @returns {HTMLElement} */ |
| 45 | + get element() { |
| 46 | + return super.element; |
| 47 | + } |
| 48 | + |
| 49 | + /** |
| 50 | + * All text inputs in the form. |
| 51 | + * @returns {Array<HTMLInputElement | HTMLTextAreaElement | HTMLDivElement>} |
| 52 | + */ |
| 53 | + get textInputs() { |
| 54 | + return [ |
| 55 | + ...this.captionInput.form.querySelectorAll( |
| 56 | + 'input[type="text"], textarea, [role="textbox"]', |
| 57 | + ), |
| 58 | + ].filter((input) => input !== this.captionInput); |
| 59 | + } |
| 60 | + |
| 61 | + /** |
| 62 | + * Text inputs in the form, grouped by their position |
| 63 | + * relative to the caption input (before/after). |
| 64 | + * @returns {{ |
| 65 | + * before: Array<HTMLInputElement | HTMLTextAreaElement | HTMLDivElement>, |
| 66 | + * after: Array<HTMLInputElement | HTMLTextAreaElement | HTMLDivElement> |
| 67 | + * }} |
| 68 | + */ |
| 69 | + get textInputsContext() { |
| 70 | + return Object.groupBy(this.textInputs, (element) => |
| 71 | + this.captionInput.compareDocumentPosition(element) & |
| 72 | + Node.DOCUMENT_POSITION_PRECEDING |
| 73 | + ? 'before' |
| 74 | + : 'after', |
| 75 | + ); |
| 76 | + } |
| 77 | + |
| 78 | + get textContext() { |
| 79 | + const { inputsToText } = AltTextController; |
| 80 | + return { |
| 81 | + before: inputsToText(this.textInputsContext.before), |
| 82 | + after: inputsToText(this.textInputsContext.after), |
| 83 | + }; |
| 84 | + } |
| 85 | + |
| 86 | + connect() { |
| 87 | + this.generate = this.generate.bind(this); |
| 88 | + this.caption = this.caption.bind(this); |
| 89 | + this.contextualCaption = this.contextualCaption.bind(this); |
| 90 | + this.renderFurniture(); |
| 91 | + } |
| 92 | + |
| 93 | + imageInputValueChanged() { |
| 94 | + if (this.imageInputValue) { |
| 95 | + this.imageInput = this.element.querySelector(this.imageInputValue); |
| 96 | + } else { |
| 97 | + this.imageInput = null; |
| 98 | + } |
| 99 | + if (this.hasSuggestTarget) this.toggleSuggestTarget(); |
| 100 | + } |
| 101 | + |
| 102 | + captionInputValueChanged() { |
| 103 | + if (this.captionInputValue) { |
| 104 | + this.captionInput = this.element.querySelector(this.captionInputValue); |
| 105 | + } else { |
| 106 | + this.captionInput = null; |
| 107 | + } |
| 108 | + } |
| 109 | + |
| 110 | + toggleSuggestTarget(event) { |
| 111 | + if (event?.target && event.target !== this.imageInput) return; |
| 112 | + this.suggestTarget.disabled = !this.imageInput?.value; |
| 113 | + } |
| 114 | + |
| 115 | + renderFurniture() { |
| 116 | + this.renderSuggestButton(); |
| 117 | + this.renderOutputArea(); |
| 118 | + this.toggleSuggestTarget(); |
| 119 | + } |
| 120 | + |
| 121 | + renderSuggestButton() { |
| 122 | + if (this.hasSuggestTarget) return; |
| 123 | + const prefix = this.element.closest('[id]').id; |
| 124 | + const buttonId = `${prefix}-generate`; |
| 125 | + const button = /* html */ ` |
| 126 | + <button |
| 127 | + id="${buttonId}" |
| 128 | + type="button" |
| 129 | + data-alt-text-target="suggest" |
| 130 | + data-action="alt-text#generate" |
| 131 | + class="button button-secondary" |
| 132 | + > |
| 133 | + ${icon} |
| 134 | +
|
| 135 | + <span>Generate suggestions</span> |
| 136 | + </button> |
| 137 | + `; |
| 138 | + this.element.insertAdjacentHTML('beforeend', button); |
| 139 | + } |
| 140 | + |
| 141 | + renderOutputArea() { |
| 142 | + const css = new CSSStyleSheet(); |
| 143 | + css.replaceSync(/* css */ ` |
| 144 | + .suggestion { |
| 145 | + display: block; |
| 146 | + margin-top: 0.5rem; |
| 147 | + margin-bottom: 0.5rem; |
| 148 | + border-radius: 0.25rem; |
| 149 | + padding: 0.5rem; |
| 150 | + background-color: lightblue; |
| 151 | + color: black; |
| 152 | + } |
| 153 | + `); |
| 154 | + this.outputArea = document.createElement('div'); |
| 155 | + document.adoptedStyleSheets.push(css); |
| 156 | + this.element.append(this.outputArea); |
| 157 | + } |
| 158 | + |
| 159 | + renderSuggestion(suggestion) { |
| 160 | + const template = document.createElement('template'); |
| 161 | + template.innerHTML = /* html */ ` |
| 162 | + <div for="${this.suggestTarget.id}" class="suggestion"> |
| 163 | + <output>${suggestion}</output> |
| 164 | + <button class="button button-small" type="button" data-action="alt-text#useSuggestion">Use</button> |
| 165 | + </div> |
| 166 | + `; |
| 167 | + this.outputArea.append(template.content.firstElementChild); |
| 168 | + } |
| 169 | + |
| 170 | + useSuggestion(event) { |
| 171 | + if (!this.captionInput) return; |
| 172 | + this.captionInput.value = event.target.previousElementSibling.textContent; |
| 173 | + } |
| 174 | + |
| 175 | + async caption(imageURL) { |
| 176 | + const captioner = await AltTextController.captioner; |
| 177 | + return (await captioner(imageURL))[0].generated_text; |
| 178 | + } |
| 179 | + |
| 180 | + async contextualCaption(imageURL) { |
| 181 | + const caption = await this.caption(imageURL); |
| 182 | + const text2text = await AltTextController.text2text; |
| 183 | + const { before, after } = this.textContext; |
| 184 | + |
| 185 | + // Enhance the caption to be more descriptive |
| 186 | + // using the text context from the form. |
| 187 | + const prompt = ` |
| 188 | +system: Change the following caption to be more descriptive: "${caption}" |
| 189 | +
|
| 190 | +system: Given this content shown before the image: ${before} |
| 191 | +
|
| 192 | +system: And this content shown after the image: ${after}`; |
| 193 | + return (await text2text(prompt))[0].generated_text; |
| 194 | + } |
| 195 | + |
| 196 | + async generate() { |
| 197 | + this.outputArea.innerHTML = ''; // Clear previous output |
| 198 | + this.suggestTarget.lastElementChild.textContent = 'Generating…'; |
| 199 | + this.suggestTarget.disabled = true; |
| 200 | + const method = this.contextualValue ? this.contextualCaption : this.caption; |
| 201 | + |
| 202 | + const url = this.imageURL; |
| 203 | + await Promise.allSettled( |
| 204 | + [...Array(3).keys()].map(() => |
| 205 | + method(url) |
| 206 | + .then((output) => this.renderSuggestion(output)) |
| 207 | + .catch((error) => { |
| 208 | + console.error('Error generating suggestion:', error); |
| 209 | + }), |
| 210 | + ), |
| 211 | + ); |
| 212 | + |
| 213 | + this.suggestTarget.disabled = false; |
| 214 | + this.suggestTarget.lastElementChild.textContent = 'Generate suggestions'; |
| 215 | + } |
| 216 | +} |
| 217 | + |
| 218 | +window.wagtail.app.register('alt-text', AltTextController); |
0 commit comments