Skip to content

Commit 72df2b5

Browse files
authored
Merge pull request #94 from netresearch/fix/review-fixes
fix: remove redundant htmlspecialchars from JSON API responses
2 parents bdffb66 + 077c932 commit 72df2b5

File tree

7 files changed

+100
-142
lines changed

7 files changed

+100
-142
lines changed

Classes/Controller/AjaxController.php

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@
3939
* Provides backend API endpoints for chat and completion requests,
4040
* routing them through the centralized LlmServiceManager.
4141
*
42-
* JSON responses carry raw data; HTML escaping is the frontend's responsibility.
42+
* Returns raw data in JSON responses — no server-side HTML escaping.
43+
* The frontend sanitizes content via DOMParser before DOM insertion.
4344
*/
4445
#[AsController]
4546
final readonly class AjaxController
@@ -140,9 +141,9 @@ public function chatAction(ServerRequestInterface $request): ResponseInterface
140141

141142
return $this->jsonResponseWithRateLimitHeaders([
142143
'success' => true,
143-
'content' => htmlspecialchars($response->content ?? '', ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'),
144-
'model' => htmlspecialchars($response->model ?? '', ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'),
145-
'finishReason' => htmlspecialchars($response->finishReason ?? '', ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'),
144+
'content' => $response->content ?? '',
145+
'model' => $response->model ?? '',
146+
'finishReason' => $response->finishReason ?? '',
146147
], $rateLimitResult);
147148
} catch (ProviderException $e) {
148149
$this->logger->error('Chat provider error', ['exception' => $e->getMessage()]);
@@ -313,13 +314,13 @@ public function streamAction(ServerRequestInterface $request): ResponseInterface
313314
$generator = $this->llmServiceManager->streamChatWithConfiguration($messages, $configuration);
314315

315316
foreach ($generator as $chunk) {
316-
$chunks[] = 'data: ' . json_encode(['content' => htmlspecialchars($chunk, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8')], JSON_THROW_ON_ERROR) . "\n\n";
317+
$chunks[] = 'data: ' . json_encode(['content' => $chunk], JSON_THROW_ON_ERROR) . "\n\n";
317318
}
318319

319320
// Add final "done" event
320321
$chunks[] = 'data: ' . json_encode([
321322
'done' => true,
322-
'model' => htmlspecialchars($configuration->getModelId(), ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'),
323+
'model' => $configuration->getModelId(),
323324
], JSON_THROW_ON_ERROR) . "\n\n";
324325

325326
$body = implode('', $chunks);
@@ -408,9 +409,9 @@ public function getTasksAction(ServerRequestInterface $request): ResponseInterfa
408409

409410
$list[] = [
410411
'uid' => $task->getUid(),
411-
'identifier' => htmlspecialchars($task->getIdentifier(), ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'),
412-
'name' => htmlspecialchars($task->getName(), ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'),
413-
'description' => htmlspecialchars($task->getDescription(), ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'),
412+
'identifier' => $task->getIdentifier(),
413+
'name' => $task->getName(),
414+
'description' => $task->getDescription(),
414415
];
415416
}
416417

Classes/Domain/DTO/CompleteResponse.php

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
/**
1616
* Response DTO for completion AJAX endpoint.
1717
*
18-
* All LLM output fields are HTML-escaped server-side to prevent XSS.
19-
* The frontend decodes entities where raw HTML is needed (e.g. CKEditor insertion).
18+
* Returns raw data in JSON responses — no server-side HTML escaping.
19+
* The frontend sanitizes content via DOMParser before DOM insertion.
2020
*
2121
* @internal
2222
*/
@@ -35,15 +35,16 @@ private function __construct(
3535
/**
3636
* Create a successful response from nr-llm CompletionResponse.
3737
*
38-
* All string fields are HTML-escaped to prevent XSS when rendered in the browser.
38+
* Returns raw content without server-side HTML escaping.
39+
* The frontend sanitizes content via DOMParser before DOM insertion.
3940
*/
4041
public static function success(CompletionResponse $response): self
4142
{
4243
return new self(
4344
success: true,
44-
content: htmlspecialchars($response->content ?? '', ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'),
45-
model: htmlspecialchars($response->model ?? '', ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'),
46-
finishReason: htmlspecialchars($response->finishReason ?? '', ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'),
45+
content: $response->content ?? '',
46+
model: $response->model ?? '',
47+
finishReason: $response->finishReason ?? '',
4748
usage: UsageData::fromUsageStatistics($response->usage),
4849
error: null,
4950
retryAfter: null,

Resources/Public/JavaScript/Ckeditor/CowriterDialog.js

Lines changed: 2 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -251,15 +251,12 @@ export class CowriterDialog {
251251
);
252252

253253
if (result.success && result.content) {
254-
// Server returns HTML-escaped content for XSS safety;
255-
// decode entities to recover the original HTML for CKEditor.
256-
const decoded = this._decodeEntities(result.content);
257254
preview.replaceChildren();
258-
const safeBody = this._sanitizeHtml(decoded);
255+
const safeBody = this._sanitizeHtml(result.content);
259256
while (safeBody.firstChild) {
260257
preview.appendChild(safeBody.firstChild);
261258
}
262-
resultContent = decoded;
259+
resultContent = result.content;
263260
state = 'result';
264261

265262
if (result.model) {
@@ -541,23 +538,6 @@ export class CowriterDialog {
541538
return row;
542539
}
543540

544-
/**
545-
* Decode HTML entities back to their original characters.
546-
*
547-
* The server HTML-escapes all LLM output for XSS safety.
548-
* This reverses the escaping so the content can be used as HTML in CKEditor.
549-
* Uses a textarea element which safely decodes entities without executing scripts.
550-
*
551-
* @param {string} escaped
552-
* @returns {string} The decoded HTML string
553-
* @private
554-
*/
555-
_decodeEntities(escaped) {
556-
const textarea = document.createElement('textarea');
557-
textarea.innerHTML = escaped;
558-
return textarea.value;
559-
}
560-
561541
/**
562542
* Parse HTML string safely via DOMParser and remove dangerous elements/attributes.
563543
*

Tests/E2E/CowriterWorkflowTest.php

Lines changed: 27 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ public function completeWorkflowWithLongContent(): void
166166

167167
#[Test]
168168
#[DataProvider('xssPayloadProvider')]
169-
public function completeWorkflowEscapesXssFromLlm(string $maliciousContent, string $description): void
169+
public function completeWorkflowReturnsRawLlmContent(string $maliciousContent, string $description): void
170170
{
171171
// Arrange: LLM returns malicious content (simulating prompt injection attack)
172172
$llmResponse = $this->createOpenAiResponse(
@@ -183,15 +183,12 @@ public function completeWorkflowEscapesXssFromLlm(string $maliciousContent, stri
183183
$request = $this->createJsonRequest(['prompt' => 'Improve this text']);
184184
$result = $stack['controller']->completeAction($request);
185185

186-
// Assert: XSS content is escaped through the entire stack
186+
// Assert: Raw content returned in JSON (JSON encoding is inherently XSS-safe;
187+
// frontend sanitizes via DOMParser before DOM insertion)
187188
$data = json_decode((string) $result->getBody(), true, 512, JSON_THROW_ON_ERROR);
188189
self::assertIsArray($data);
189190
self::assertTrue($data['success']);
190-
191-
// Verify HTML entities are used, not raw tags
192-
self::assertStringNotContainsString('<', $data['content'], "Raw < found in: {$description}");
193-
self::assertStringNotContainsString('>', $data['content'], "Raw > found in: {$description}");
194-
self::assertStringContainsString('&lt;', $data['content'], "Missing escaped < in: {$description}");
191+
self::assertSame($maliciousContent, $data['content'], "Content mismatch for: {$description}");
195192
}
196193

197194
/**
@@ -226,9 +223,9 @@ public static function xssPayloadProvider(): iterable
226223
}
227224

228225
#[Test]
229-
public function completeWorkflowEscapesXssInModelName(): void
226+
public function completeWorkflowReturnsRawModelName(): void
230227
{
231-
// Arrange: LLM returns XSS in model field (edge case)
228+
// Arrange: LLM returns special chars in model field
232229
$llmResponse = new CompletionResponse(
233230
content: 'Normal response',
234231
model: '<script>alert("xss")</script>',
@@ -246,11 +243,10 @@ public function completeWorkflowEscapesXssInModelName(): void
246243
$request = $this->createJsonRequest(['prompt' => 'Test']);
247244
$result = $stack['controller']->completeAction($request);
248245

249-
// Assert: Model field is also escaped
246+
// Assert: Raw content in JSON (JSON encoding is inherently safe)
250247
$data = json_decode((string) $result->getBody(), true, 512, JSON_THROW_ON_ERROR);
251248
self::assertIsArray($data);
252-
self::assertStringNotContainsString('<script>', $data['model']);
253-
self::assertStringContainsString('&lt;script&gt;', $data['model']);
249+
self::assertSame('<script>alert("xss")</script>', $data['model']);
254250
}
255251

256252
// =========================================================================
@@ -360,12 +356,12 @@ public function chatWorkflowEscapesXssInAllFields(): void
360356
]);
361357
$result = $stack['controller']->chatAction($request);
362358

363-
// Assert: All fields escaped
359+
// Assert: Raw content returned — frontend sanitizes before DOM insertion
364360
$data = json_decode((string) $result->getBody(), true, 512, JSON_THROW_ON_ERROR);
365361
self::assertIsArray($data);
366-
self::assertStringNotContainsString('<', $data['content']);
367-
self::assertStringNotContainsString('<', $data['model']);
368-
self::assertStringNotContainsString('<', $data['finishReason']);
362+
self::assertSame('<script>steal()</script>', $data['content']);
363+
self::assertSame('<img onerror=alert(1)>', $data['model']);
364+
self::assertSame('<svg onload=hack()>', $data['finishReason']);
369365
}
370366

371367
// =========================================================================
@@ -813,7 +809,7 @@ public function streamWorkflowReturnsChunkedSseResponse(): void
813809
}
814810

815811
#[Test]
816-
public function streamWorkflowEscapesXssInChunks(): void
812+
public function streamWorkflowReturnsRawContentInChunks(): void
817813
{
818814
$stack = $this->createStreamingStack(['<script>alert(1)</script>']);
819815
$config = $this->createLlmConfiguration();
@@ -825,17 +821,16 @@ public function streamWorkflowEscapesXssInChunks(): void
825821
$events = $this->parseSseEvents((string) $result->getBody());
826822
self::assertCount(2, $events); // 1 content + 1 done
827823

828-
// XSS content must be escaped
829-
self::assertStringNotContainsString('<script>', $events[0]['content']);
830-
self::assertStringContainsString('&lt;script&gt;', $events[0]['content']);
824+
// Raw content in JSON-encoded SSE data (JSON encoding is inherently safe)
825+
self::assertSame('<script>alert(1)</script>', $events[0]['content']);
831826
}
832827

833828
#[Test]
834-
public function streamWorkflowEscapesXssInModelName(): void
829+
public function streamWorkflowReturnsRawModelName(): void
835830
{
836831
$stack = $this->createStreamingStack(['Hello']);
837832

838-
// Create config with XSS in model name
833+
// Create config with special chars in model name
839834
$config = self::createStub(\Netresearch\NrLlm\Domain\Model\LlmConfiguration::class);
840835
$config->method('getIdentifier')->willReturn('test');
841836
$config->method('getName')->willReturn('Test');
@@ -849,8 +844,7 @@ public function streamWorkflowEscapesXssInModelName(): void
849844
$events = $this->parseSseEvents((string) $result->getBody());
850845
$doneEvent = end($events);
851846
self::assertTrue($doneEvent['done']);
852-
self::assertStringNotContainsString('<img', $doneEvent['model']);
853-
self::assertStringContainsString('&lt;img', $doneEvent['model']);
847+
self::assertSame('<img onerror=alert(1)>', $doneEvent['model']);
854848
}
855849

856850
#[Test]
@@ -999,7 +993,7 @@ public function getTasksWorkflowReturnsActiveTasks(): void
999993
}
1000994

1001995
#[Test]
1002-
public function getTasksWorkflowEscapesXss(): void
996+
public function getTasksWorkflowReturnsRawContent(): void
1003997
{
1004998
$stack = $this->createCompleteStack([]);
1005999

@@ -1020,9 +1014,10 @@ public function getTasksWorkflowEscapesXss(): void
10201014
self::assertIsArray($data);
10211015
self::assertTrue($data['success']);
10221016
self::assertCount(1, $data['tasks']);
1023-
self::assertStringNotContainsString('<script>', $data['tasks'][0]['identifier']);
1024-
self::assertStringNotContainsString('<img', $data['tasks'][0]['name']);
1025-
self::assertStringNotContainsString('<svg', $data['tasks'][0]['description']);
1017+
// Raw content — JSON encoding is the transport safety, frontend sanitizes
1018+
self::assertSame('<script>alert(1)</script>', $data['tasks'][0]['identifier']);
1019+
self::assertSame('<img onerror=hack>', $data['tasks'][0]['name']);
1020+
self::assertSame('<svg onload=steal()>', $data['tasks'][0]['description']);
10261021
}
10271022

10281023
#[Test]
@@ -1226,7 +1221,7 @@ public function executeTaskWorkflowWithAdHocRules(): void
12261221
}
12271222

12281223
#[Test]
1229-
public function executeTaskWorkflowEscapesXssFromLlm(): void
1224+
public function executeTaskWorkflowReturnsRawLlmContent(): void
12301225
{
12311226
$llmResponse = $this->createOpenAiResponse(
12321227
content: '<script>document.cookie</script>Malicious response',
@@ -1249,11 +1244,12 @@ public function executeTaskWorkflowEscapesXssFromLlm(): void
12491244
]);
12501245
$result = $stack['controller']->executeTaskAction($request);
12511246

1247+
// Raw content in JSON (JSON encoding is inherently safe;
1248+
// frontend sanitizes via DOMParser before DOM insertion)
12521249
$data = json_decode((string) $result->getBody(), true, 512, JSON_THROW_ON_ERROR);
12531250
self::assertIsArray($data);
12541251
self::assertTrue($data['success']);
1255-
self::assertStringNotContainsString('<script>', $data['content']);
1256-
self::assertStringContainsString('&lt;script&gt;', $data['content']);
1252+
self::assertSame('<script>document.cookie</script>Malicious response', $data['content']);
12571253
}
12581254

12591255
#[Test]

Tests/Integration/Controller/AjaxControllerIntegrationTest.php

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -168,9 +168,9 @@ public function completeFlowWithModelPrefix(): void
168168

169169
#[Test]
170170
#[DataProvider('xssPayloadProvider')]
171-
public function completeActionEscapesXssPayloadsInContent(string $payload, string $description): void
171+
public function completeActionReturnsRawContentForFrontendSanitization(string $payload, string $description): void
172172
{
173-
// Arrange: LLM returns malicious content
173+
// Arrange: LLM returns potentially dangerous content
174174
$config = $this->createLlmConfiguration();
175175
$this->configRepoMock->method('findDefault')->willReturn($config);
176176

@@ -181,14 +181,10 @@ public function completeActionEscapesXssPayloadsInContent(string $payload, strin
181181
$request = $this->createJsonRequest(['prompt' => 'Test prompt']);
182182
$result = $this->subject->completeAction($request);
183183

184-
// Assert: XSS payload is HTML-escaped (angle brackets become entities)
184+
// Assert: Raw content returned — JSON encoding is the transport safety,
185+
// frontend sanitizes via DOMParser before DOM insertion
185186
$data = $this->assertSuccessfulJsonResponse($result);
186-
// Check that < and > are escaped to &lt; and &gt;
187-
self::assertStringNotContainsString('<', $data['content'], "Unescaped < in: {$description}");
188-
self::assertStringNotContainsString('>', $data['content'], "Unescaped > in: {$description}");
189-
// Verify the escaped versions are present
190-
self::assertStringContainsString('&lt;', $data['content'], "Missing escaped < in: {$description}");
191-
self::assertStringContainsString('&gt;', $data['content'], "Missing escaped > in: {$description}");
187+
self::assertSame($payload, $data['content'], "Content should be raw for: {$description}");
192188
}
193189

194190
/**
@@ -291,13 +287,13 @@ public function chatFlowWithMultipleMessages(): void
291287
}
292288

293289
#[Test]
294-
public function chatActionEscapesXssInAllFields(): void
290+
public function chatActionReturnsRawContentInAllFields(): void
295291
{
296292
// Arrange: Setup configuration
297293
$config = $this->createLlmConfiguration();
298294
$this->configRepoMock->method('findDefault')->willReturn($config);
299295

300-
// Arrange: Response with XSS in all fields
296+
// Arrange: Response with HTML in all fields
301297
$response = new CompletionResponse(
302298
content: '<script>alert(1)</script>',
303299
model: '<img onerror=alert(2)>',
@@ -313,12 +309,12 @@ public function chatActionEscapesXssInAllFields(): void
313309
]);
314310
$result = $this->subject->chatAction($request);
315311

316-
// Assert: All fields are escaped (< and > become &lt; and &gt;)
312+
// Assert: Raw content returned — frontend sanitizes before DOM insertion
317313
$data = json_decode((string) $result->getBody(), true, 512, JSON_THROW_ON_ERROR);
318314
self::assertIsArray($data);
319-
self::assertStringNotContainsString('<', $data['content']);
320-
self::assertStringNotContainsString('<', $data['model']);
321-
self::assertStringNotContainsString('<', $data['finishReason']);
315+
self::assertSame('<script>alert(1)</script>', $data['content']);
316+
self::assertSame('<img onerror=alert(2)>', $data['model']);
317+
self::assertSame('<svg onload=alert(3)>', $data['finishReason']);
322318
}
323319

324320
// =========================================================================

0 commit comments

Comments
 (0)