Skip to content

Commit 48c2a7d

Browse files
Samiya CaurDevtools-frontend LUCI CQ
authored andcommitted
[AiAssistance] Add UI elements for supporting screenshots as input
This CL covers the screenshot mechanism, how the screenshot will be displayed in input area and how user can remove it. Bug: 393036589 Change-Id: Ifc147f8ed2c960fb0fa5695b8a0f3c163a9a0670 Reviewed-on: https://chromium-review.googlesource.com/c/devtools/devtools-frontend/+/6225119 Commit-Queue: Samiya Caur <[email protected]> Auto-Submit: Samiya Caur <[email protected]> Reviewed-by: Alex Rudenko <[email protected]>
1 parent 10ef716 commit 48c2a7d

File tree

5 files changed

+239
-9
lines changed

5 files changed

+239
-9
lines changed

front_end/panels/ai_assistance/AiAssistancePanel.test.ts

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ import * as Workspace from '../../models/workspace/workspace.js';
1111
import {createAiAssistancePanel, detachPanels, mockAidaClient} from '../../testing/AiAssistanceHelpers.js';
1212
import {findMenuItemWithLabel, getMenu} from '../../testing/ContextMenuHelpers.js';
1313
import {dispatchClickEvent} from '../../testing/DOMHelpers.js';
14-
import {describeWithEnvironment, registerNoopActions} from '../../testing/EnvironmentHelpers.js';
14+
import {createTarget, registerNoopActions} from '../../testing/EnvironmentHelpers.js';
15+
import {describeWithMockConnection} from '../../testing/MockConnection.js';
1516
import * as UI from '../../ui/legacy/legacy.js';
1617
import * as ElementsPanel from '../elements/elements.js';
1718
import * as NetworkPanel from '../network/network.js';
@@ -27,7 +28,7 @@ async function drainMicroTasks() {
2728
await new Promise(resolve => setTimeout(resolve, 0));
2829
}
2930

30-
describeWithEnvironment('AI Assistance Panel', () => {
31+
describeWithMockConnection('AI Assistance Panel', () => {
3132
beforeEach(() => {
3233
registerNoopActions(['elements.toggle-element-search']);
3334
UI.Context.Context.instance().setFlavor(ElementsPanel.ElementsPanel.ElementsPanel, null);
@@ -1092,4 +1093,63 @@ describeWithEnvironment('AI Assistance Panel', () => {
10921093
},
10931094
]);
10941095
});
1096+
1097+
describe('multimodal input', () => {
1098+
function mockScreenshotModel() {
1099+
const target = createTarget();
1100+
const screenCaptureModel = target.model(SDK.ScreenCaptureModel.ScreenCaptureModel);
1101+
assert.exists(screenCaptureModel);
1102+
return {
1103+
captureScreenshotStub:
1104+
sinon.stub(screenCaptureModel, 'captureScreenshot').returns(Promise.resolve('imageInput')),
1105+
};
1106+
}
1107+
1108+
it('multimodal related functions unavailable when multimodal is disabled', async () => {
1109+
Object.assign(Root.Runtime.hostConfig, {
1110+
devToolsFreestyler: {
1111+
enabled: true,
1112+
multimodal: false,
1113+
},
1114+
});
1115+
UI.Context.Context.instance().setFlavor(
1116+
ElementsPanel.ElementsPanel.ElementsPanel,
1117+
sinon.createStubInstance(ElementsPanel.ElementsPanel.ElementsPanel));
1118+
const {
1119+
view,
1120+
} = createAiAssistancePanel();
1121+
1122+
assert.isFalse(view.lastCall.args[0].multimodalInputEnabled);
1123+
assert.notExists(view.lastCall.args[0].onTakeScreenshot);
1124+
assert.notExists(view.lastCall.args[0].onRemoveImageInput);
1125+
assert.isEmpty(view.lastCall.args[0].imageInput);
1126+
});
1127+
1128+
it('adds an image input and then removes it', async () => {
1129+
const {captureScreenshotStub} = mockScreenshotModel();
1130+
Object.assign(Root.Runtime.hostConfig, {
1131+
devToolsFreestyler: {
1132+
enabled: true,
1133+
multimodal: true,
1134+
},
1135+
});
1136+
UI.Context.Context.instance().setFlavor(
1137+
ElementsPanel.ElementsPanel.ElementsPanel,
1138+
sinon.createStubInstance(ElementsPanel.ElementsPanel.ElementsPanel));
1139+
const {
1140+
view,
1141+
} = createAiAssistancePanel();
1142+
1143+
assert.isTrue(view.lastCall.args[0].multimodalInputEnabled);
1144+
1145+
await view.lastCall.args[0].onTakeScreenshot?.();
1146+
1147+
expect(captureScreenshotStub.calledOnce);
1148+
assert.deepEqual(view.lastCall.args[0].imageInput, 'imageInput');
1149+
1150+
await view.lastCall.args[0].onRemoveImageInput?.();
1151+
1152+
assert.isEmpty(view.lastCall.args[0].imageInput);
1153+
});
1154+
});
10951155
});

front_end/panels/ai_assistance/AiAssistancePanel.ts

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import * as i18n from '../../core/i18n/i18n.js';
1010
import type * as Platform from '../../core/platform/platform.js';
1111
import * as Root from '../../core/root/root.js';
1212
import * as SDK from '../../core/sdk/sdk.js';
13+
import * as Protocol from '../../generated/protocol.js';
1314
import * as Workspace from '../../models/workspace/workspace.js';
1415
import * as UI from '../../ui/legacy/legacy.js';
1516
import * as Lit from '../../ui/lit/lit.js';
@@ -58,6 +59,7 @@ const {html} = Lit;
5859

5960
const AI_ASSISTANCE_SEND_FEEDBACK = 'https://crbug.com/364805393' as Platform.DevToolsPath.UrlString;
6061
const AI_ASSISTANCE_HELP = 'https://goo.gle/devtools-ai-assistance' as Platform.DevToolsPath.UrlString;
62+
const SCREENSHOT_QUALITY = 100;
6163

6264
const UIStrings = {
6365
/**
@@ -254,6 +256,7 @@ export class AiAssistancePanel extends UI.Panel.Panel {
254256
#project?: Workspace.Workspace.Project;
255257
#patchSuggestion?: string;
256258
#patchSuggestionLoading?: boolean;
259+
#imageInput: string = '';
257260

258261
constructor(private view: View = defaultView, {aidaClient, aidaAvailability, syncInfo}: {
259262
aidaClient: Host.AidaClient.AidaClient,
@@ -639,7 +642,11 @@ export class AiAssistancePanel extends UI.Panel.Panel {
639642
inspectElementToggled: this.#toggleSearchElementAction.toggled(),
640643
userInfo: this.#userInfo,
641644
canShowFeedbackForm: this.#serverSideLoggingEnabled,
645+
multimodalInputEnabled:
646+
isAiAssistanceMultimodalInputEnabled() && this.#currentAgent?.type === AgentType.STYLING,
647+
imageInput: this.#imageInput,
642648
onTextSubmit: (text: string) => {
649+
this.#imageInput = '';
643650
void this.#startConversation(text);
644651
Host.userMetrics.actionTaken(Host.UserMetrics.Action.AiAssistanceQuerySubmitted);
645652
},
@@ -651,6 +658,9 @@ export class AiAssistancePanel extends UI.Panel.Panel {
651658
onCancelCrossOriginChat: this.#blockedByCrossOrigin && this.#previousSameOriginContext ?
652659
this.#handleCrossOriginChatCancellation.bind(this) :
653660
undefined,
661+
onTakeScreenshot: isAiAssistanceMultimodalInputEnabled() ? this.#handleTakeScreenshot.bind(this) : undefined,
662+
onRemoveImageInput: isAiAssistanceMultimodalInputEnabled() ? this.#handleRemoveImageInput.bind(this) :
663+
undefined,
654664
onApplyToWorkspace: this.#onApplyToWorkspace.bind(this)
655665
},
656666
this.#viewOutput, this.#contentContainer);
@@ -834,6 +844,31 @@ export class AiAssistancePanel extends UI.Panel.Panel {
834844
}
835845
}
836846

847+
async #handleTakeScreenshot(): Promise<void> {
848+
const mainTarget = SDK.TargetManager.TargetManager.instance().primaryPageTarget();
849+
if (!mainTarget) {
850+
throw new Error('Could not find main target');
851+
}
852+
const model = mainTarget.model(SDK.ScreenCaptureModel.ScreenCaptureModel);
853+
if (!model) {
854+
throw new Error('Could not find model');
855+
}
856+
const bytes = await model.captureScreenshot(
857+
Protocol.Page.CaptureScreenshotRequestFormat.Jpeg,
858+
SCREENSHOT_QUALITY,
859+
SDK.ScreenCaptureModel.ScreenshotMode.FROM_VIEWPORT,
860+
);
861+
if (bytes) {
862+
this.#imageInput = bytes;
863+
void this.doUpdate();
864+
}
865+
}
866+
867+
#handleRemoveImageInput(): void {
868+
this.#imageInput = '';
869+
void this.doUpdate();
870+
}
871+
837872
#runAbortController = new AbortController();
838873
#cancel(): void {
839874
this.#runAbortController.abort();
@@ -1122,6 +1157,11 @@ export class ActionDelegate implements UI.ActionRegistration.ActionDelegate {
11221157
}
11231158
}
11241159

1160+
function isAiAssistanceMultimodalInputEnabled(): boolean {
1161+
const {hostConfig} = Root.Runtime;
1162+
return Boolean(hostConfig.devToolsFreestyler?.multimodal);
1163+
}
1164+
11251165
function isAiAssistancePatchingEnabled(): boolean {
11261166
const {hostConfig} = Root.Runtime;
11271167
return Boolean(hostConfig.devToolsFreestyler?.patching);

front_end/panels/ai_assistance/components/ChatView.ts

Lines changed: 97 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,14 @@ const UIStringsNotTranslate = {
268268
*@description Button text for staging changes to workspace.
269269
*/
270270
applyToWorkspace: 'Apply to workspace',
271+
/**
272+
*@description Title for the take screenshot button.
273+
*/
274+
takeScreenshotButtonTitle: 'Take screenshot',
275+
/**
276+
*@description Title for the remove image input button.
277+
*/
278+
removeImageInputButtonTitle: 'Remove image input',
271279
};
272280

273281
const str_ = i18n.i18n.registerUIStrings('panels/ai_assistance/components/ChatView.ts', UIStrings);
@@ -324,6 +332,8 @@ export interface Props {
324332
onContextClick: () => void | Promise<void>;
325333
onNewConversation: () => void;
326334
onCancelCrossOriginChat?: () => void;
335+
onTakeScreenshot?: () => Promise<void>;
336+
onRemoveImageInput?: () => void;
327337
inspectElementToggled: boolean;
328338
state: State;
329339
aidaAvailability: Host.AidaClient.AidaAccessPreconditions;
@@ -340,6 +350,8 @@ export interface Props {
340350
patchSuggestion?: string;
341351
patchSuggestionLoading?: boolean;
342352
projectName?: string;
353+
multimodalInputEnabled?: boolean;
354+
imageInput?: string;
343355
onApplyToWorkspace?: () => void;
344356
}
345357

@@ -738,14 +750,18 @@ export class ChatView extends HTMLElement {
738750
state: this.#props.state,
739751
selectedContext: this.#props.selectedContext,
740752
inspectElementToggled: this.#props.inspectElementToggled,
753+
multimodalInputEnabled: this.#props.multimodalInputEnabled,
741754
agentType: this.#props.agentType,
755+
imageInput: this.#props.imageInput,
742756
onContextClick: this.#props.onContextClick,
743757
onInspectElementClick: this.#props.onInspectElementClick,
744758
onSubmit: this.#handleSubmit,
745759
onTextAreaKeyDown: this.#handleTextAreaKeyDown,
746760
onCancel: this.#handleCancel,
747761
onNewConversation: this.#props.onNewConversation,
748762
onCancelCrossOriginChat: this.#props.onCancelCrossOriginChat,
763+
onTakeScreenshot: this.#props.onTakeScreenshot,
764+
onRemoveImageInput: this.#props.onRemoveImageInput,
749765
})
750766
}
751767
</main>
@@ -1420,6 +1436,65 @@ function renderChatInputButtons(
14201436
></devtools-button>`;
14211437
}
14221438

1439+
function renderTakeScreenshotButton({
1440+
multimodalInputEnabled,
1441+
isTextInputDisabled,
1442+
onTakeScreenshot,
1443+
}: {
1444+
isTextInputDisabled: boolean,
1445+
multimodalInputEnabled?: boolean,
1446+
onTakeScreenshot?: () => Promise<void>,
1447+
}): Lit.LitTemplate {
1448+
if (!multimodalInputEnabled) {
1449+
return Lit.nothing;
1450+
}
1451+
return html`<devtools-button
1452+
class="chat-input-button"
1453+
aria-label=${lockedString(UIStringsNotTranslate.takeScreenshotButtonTitle)}
1454+
@click=${onTakeScreenshot}
1455+
.data=${
1456+
{
1457+
variant: Buttons.Button.Variant.ICON,
1458+
size: Buttons.Button.Size.REGULAR,
1459+
disabled: isTextInputDisabled,
1460+
iconName: 'photo-camera',
1461+
title: lockedString(UIStringsNotTranslate.takeScreenshotButtonTitle),
1462+
jslogContext: 'take-screenshot',
1463+
} as Buttons.Button.ButtonData
1464+
}
1465+
></devtools-button>`;
1466+
}
1467+
1468+
function renderImageInput({
1469+
multimodalInputEnabled,
1470+
imageInput,
1471+
onRemoveImageInput,
1472+
}: {
1473+
multimodalInputEnabled?: boolean,
1474+
imageInput?: string,
1475+
onRemoveImageInput?: () => void,
1476+
}): Lit.LitTemplate {
1477+
if (!multimodalInputEnabled || !imageInput || imageInput==='') {
1478+
return Lit.nothing;
1479+
}
1480+
return html`
1481+
<div class="image-input-container">
1482+
<devtools-button
1483+
aria-label=${lockedString(UIStringsNotTranslate.removeImageInputButtonTitle)}
1484+
@click=${onRemoveImageInput}
1485+
.data=${
1486+
{
1487+
variant: Buttons.Button.Variant.ICON,
1488+
size: Buttons.Button.Size.MICRO,
1489+
iconName: 'cross',
1490+
title: lockedString(UIStringsNotTranslate.removeImageInputButtonTitle),
1491+
} as Buttons.Button.ButtonData
1492+
}
1493+
></devtools-button>
1494+
<img src="data:image/jpeg;base64, ${imageInput}" alt="Screenshot input" />
1495+
</div>`;
1496+
}
1497+
14231498
function renderChatInput({
14241499
isLoading,
14251500
blockedByCrossOrigin,
@@ -1428,14 +1503,18 @@ function renderChatInput({
14281503
state,
14291504
selectedContext,
14301505
inspectElementToggled,
1506+
multimodalInputEnabled,
14311507
agentType,
1508+
imageInput,
14321509
onContextClick,
14331510
onInspectElementClick,
14341511
onSubmit,
14351512
onTextAreaKeyDown,
14361513
onCancel,
14371514
onNewConversation,
14381515
onCancelCrossOriginChat,
1516+
onTakeScreenshot,
1517+
onRemoveImageInput,
14391518
}: {
14401519
isLoading: boolean,
14411520
blockedByCrossOrigin: boolean,
@@ -1444,24 +1523,33 @@ function renderChatInput({
14441523
state: State,
14451524
selectedContext: ConversationContext<unknown> | null,
14461525
inspectElementToggled: boolean,
1526+
multimodalInputEnabled?: boolean,
14471527
agentType?: AgentType,
1528+
imageInput?: string,
14481529
onContextClick: () => void | Promise<void>,
14491530
onInspectElementClick: () => void,
14501531
onSubmit: (ev: SubmitEvent) => void,
14511532
onTextAreaKeyDown: (ev: KeyboardEvent) => void,
14521533
onCancel: (ev: SubmitEvent) => void,
14531534
onNewConversation: () => void,
14541535
onCancelCrossOriginChat?: () => void,
1536+
onTakeScreenshot?: () => Promise<void>,
1537+
onRemoveImageInput?: () => void,
14551538
}): Lit.LitTemplate {
14561539
if (!agentType) {
14571540
return Lit.nothing;
14581541
}
14591542

1460-
const cls = Lit.Directives.classMap({
1543+
const chatInputCls = Lit.Directives.classMap({
14611544
'chat-input': true,
14621545
'two-big-buttons': blockedByCrossOrigin,
14631546
});
14641547

1548+
const chatInputContainerCls = Lit.Directives.classMap({
1549+
'chat-input-container': true,
1550+
disabled: isTextInputDisabled,
1551+
});
1552+
14651553
// clang-format off
14661554
return html`
14671555
<form class="input-form" @submit=${onSubmit}>
@@ -1481,8 +1569,11 @@ function renderChatInput({
14811569
</div>
14821570
</div>
14831571
` : Lit.nothing}
1484-
<div class="chat-input-container">
1485-
<textarea class=${cls}
1572+
<div class=${chatInputContainerCls}>
1573+
${renderImageInput(
1574+
{multimodalInputEnabled, imageInput, onRemoveImageInput}
1575+
)}
1576+
<textarea class=${chatInputCls}
14861577
.disabled=${isTextInputDisabled}
14871578
wrap="hard"
14881579
maxlength="10000"
@@ -1491,6 +1582,9 @@ function renderChatInput({
14911582
jslog=${VisualLogging.textField('query').track({ keydown: 'Enter' })}
14921583
></textarea>
14931584
<div class="chat-input-buttons">
1585+
${renderTakeScreenshotButton({
1586+
multimodalInputEnabled, isTextInputDisabled, onTakeScreenshot
1587+
})}
14941588
${renderChatInputButtons({ isLoading, blockedByCrossOrigin, isTextInputDisabled, onCancel, onNewConversation, onCancelCrossOriginChat })}
14951589
</div>
14961590
</div>

0 commit comments

Comments
 (0)