Skip to content

Commit 0c6679f

Browse files
committed
feat: handle sms: scheme
1 parent 1eb05d9 commit 0c6679f

File tree

9 files changed

+213
-1
lines changed

9 files changed

+213
-1
lines changed

library/HTMLPurifier.includes.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,5 +232,6 @@
232232
require 'HTMLPurifier/URIScheme/news.php';
233233
require 'HTMLPurifier/URIScheme/nntp.php';
234234
require 'HTMLPurifier/URIScheme/tel.php';
235+
require 'HTMLPurifier/URIScheme/sms.php';
235236
require 'HTMLPurifier/VarParser/Flexible.php';
236237
require 'HTMLPurifier/VarParser/Native.php';

library/HTMLPurifier.safe-includes.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,5 +226,6 @@
226226
require_once $__dir . '/HTMLPurifier/URIScheme/news.php';
227227
require_once $__dir . '/HTMLPurifier/URIScheme/nntp.php';
228228
require_once $__dir . '/HTMLPurifier/URIScheme/tel.php';
229+
require_once $__dir . '/HTMLPurifier/URIScheme/sms.php';
229230
require_once $__dir . '/HTMLPurifier/VarParser/Flexible.php';
230231
require_once $__dir . '/HTMLPurifier/VarParser/Native.php';

library/HTMLPurifier/ConfigSchema/schema.ser

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

library/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ array (
99
'nntp' => true,
1010
'news' => true,
1111
'tel' => true,
12+
'sms' => true,
1213
)
1314
--DESCRIPTION--
1415
Whitelist that defines the schemes that a URI is allowed to have. This
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
<?php
2+
3+
/**
4+
* Validates sms (for text messaging).
5+
*
6+
* The relevant specification for this protocol is RFC 5724.
7+
* This class normalizes SMS numbers so that they only include
8+
* digits, optionally with a leading plus for international numbers.
9+
*
10+
* According to RFC 5724, SMS URIs support the 'body' parameter
11+
* using the format: sms:number?body=message
12+
* However, the format: sms:number&body=message is commonly used on
13+
* the web, so it is also supported here.
14+
*/
15+
16+
class HTMLPurifier_URIScheme_sms extends HTMLPurifier_URIScheme
17+
{
18+
/**
19+
* @type bool
20+
*/
21+
public $browsable = false;
22+
23+
/**
24+
* @type bool
25+
*/
26+
public $may_omit_host = true;
27+
28+
/**
29+
* @param HTMLPurifier_URI $uri
30+
* @param HTMLPurifier_Config $config
31+
* @param HTMLPurifier_Context $context
32+
* @return bool
33+
*/
34+
public function doValidate(&$uri, $config, $context)
35+
{
36+
$uri->userinfo = null;
37+
$uri->host = null;
38+
$uri->port = null;
39+
40+
// Handle SMS URIs with &body= syntax (non-standard but common)
41+
if (strpos($uri->path, '&body=') !== false) {
42+
$parts = explode('&body=', $uri->path, 2);
43+
$phone_number = $parts[0];
44+
$body_content = isset($parts[1]) ? $parts[1] : '';
45+
46+
// Clean the phone number part
47+
$phone_number = preg_replace(
48+
'/(?!^\+)[^\d]/',
49+
'',
50+
rawurldecode($phone_number)
51+
);
52+
53+
// Sanitize the body content
54+
$body_content = $this->sanitizeBody($body_content);
55+
56+
// Reconstruct the path
57+
if (!empty($body_content)) {
58+
$uri->path = $phone_number . '&body=' . $body_content;
59+
} else {
60+
$uri->path = $phone_number;
61+
}
62+
} else {
63+
// Clean the phone number (no body parameter)
64+
$uri->path = preg_replace(
65+
'/(?!^\+)[^\d]/',
66+
'',
67+
rawurldecode($uri->path)
68+
);
69+
}
70+
71+
// Handle standard ?body= syntax in query parameters
72+
if (!is_null($uri->query) && strpos($uri->query, 'body=') === 0) {
73+
$body_content = substr($uri->query, 5); // Remove 'body='
74+
$body_content = $this->sanitizeBody($body_content);
75+
76+
if (!empty($body_content)) {
77+
$uri->query = 'body=' . $body_content;
78+
} else {
79+
$uri->query = null;
80+
}
81+
}
82+
83+
return true;
84+
}
85+
86+
/**
87+
* Sanitizes SMS body content
88+
* @param string $body
89+
* @return string
90+
*/
91+
private function sanitizeBody($body)
92+
{
93+
// Remove potentially dangerous characters
94+
$sanitized = preg_replace('/[<>"\']/', '', $body);
95+
// Remove any remaining script-like content
96+
$sanitized = preg_replace('/script|alert|javascript/i', '', $sanitized);
97+
return $sanitized;
98+
}
99+
}

tests/HTMLPurifier/AttrDef/URITest.php

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ public function testIntegration()
2525
$this->assertDef('tel:+15555555555');
2626
$this->assertDef('tel:+15555 555 555', 'tel:+15555555555');
2727
$this->assertDef('tel:+15555%20555%20555', 'tel:+15555555555');
28+
$this->assertDef('sms:+15555555555');
29+
$this->assertDef('sms:+15555 555 555', 'sms:+15555555555');
30+
$this->assertDef('sms:+15555%20555%20555', 'sms:+15555555555');
31+
$this->assertDef('sms:5555&body=HOME', 'sms:5555&body=HOME');
2832
}
2933

3034
public function testIntegrationWithPercentEncoder()

tests/HTMLPurifier/URIFilter/MakeAbsoluteTest.php

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,14 @@ public function testPreserveAltSchemeWithTel()
4444
$this->assertFiltering('tel:+15555%20555%20555');
4545
}
4646

47+
public function testPreserveAltSchemeWithSms()
48+
{
49+
$this->assertFiltering('sms:+15555555555');
50+
$this->assertFiltering('sms:+15555 555 555');
51+
$this->assertFiltering('sms:+15555%20555%20555');
52+
$this->assertFiltering('sms:5555&body=HOME');
53+
}
54+
4755
public function testFilterIgnoreHTTPSpecialCase()
4856
{
4957
$this->assertFiltering('http:/', 'http://example.com/');

tests/HTMLPurifier/URIParserTest.php

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,28 @@ public function testTelURI()
8181
);
8282
}
8383

84+
public function testSmsURI()
85+
{
86+
$this->assertParsing(
87+
'sms:+1 (555) 555-5555',
88+
'sms', null, null, null, '+1 (555) 555-5555', null, null
89+
);
90+
$this->assertParsing(
91+
'sms:+1%20(555)%20555-5555',
92+
'sms', null, null, null, '+1%20(555)%20555-5555', null, null
93+
);
94+
$this->assertParsing(
95+
'sms:5555&body=HOME',
96+
'sms',
97+
null,
98+
null,
99+
null,
100+
'5555',
101+
'body=HOME',
102+
null
103+
);
104+
}
105+
84106
public function testIPv4Address()
85107
{
86108
$this->assertParsing(

tests/HTMLPurifier/URISchemeTest.php

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,82 @@ public function test_tel_strip_letters()
215215
);
216216
}
217217

218+
public function test_sms_strip_punctuation()
219+
{
220+
$this->assertValidation(
221+
'sms:+1 (555) 555-5555', 'sms:+15555555555'
222+
);
223+
}
224+
225+
public function test_sms_with_url_encoding()
226+
{
227+
$this->assertValidation(
228+
'sms:+1%20(555)%20555-5555', 'sms:+15555555555'
229+
);
230+
}
231+
232+
public function test_sms_regular()
233+
{
234+
$this->assertValidation(
235+
'sms:+15555555555'
236+
);
237+
}
238+
239+
public function test_sms_no_plus()
240+
{
241+
$this->assertValidation(
242+
'sms:555-555-5555', 'sms:5555555555'
243+
);
244+
}
245+
246+
public function test_sms_strip_letters()
247+
{
248+
$this->assertValidation(
249+
'sms:abcd1234',
250+
'sms:1234'
251+
);
252+
}
253+
254+
public function test_sms_with_body_query()
255+
{
256+
$this->assertValidation(
257+
'sms:5555&body=HOME',
258+
'sms:5555&body=HOME'
259+
);
260+
}
261+
262+
public function test_sms_strip_invalid_params()
263+
{
264+
$this->assertValidation(
265+
'sms:+15555555555&body=Hello&subject=Test',
266+
'sms:+15555555555&body=Hello'
267+
);
268+
}
269+
270+
public function test_sms_with_url_encoded_body()
271+
{
272+
$this->assertValidation(
273+
'sms:5555&body=Hello%20World',
274+
'sms:5555&body=Hello%20World'
275+
);
276+
}
277+
278+
public function test_sms_strip_dangerous_query_params()
279+
{
280+
$this->assertValidation(
281+
'sms:5555&body=<script>alert("xss")</script>&subject=Test',
282+
'sms:5555&body='
283+
);
284+
}
285+
286+
public function test_sms_strip_invalid_query_params()
287+
{
288+
$this->assertValidation(
289+
'sms:5555&body=HOME&invalid=param&subject=Test',
290+
'sms:5555&body=HOME'
291+
);
292+
}
293+
218294
public function test_data_png()
219295
{
220296
$this->assertValidation(

0 commit comments

Comments
 (0)