Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions library/HTMLPurifier.includes.php
Original file line number Diff line number Diff line change
Expand Up @@ -232,5 +232,6 @@
require 'HTMLPurifier/URIScheme/news.php';
require 'HTMLPurifier/URIScheme/nntp.php';
require 'HTMLPurifier/URIScheme/tel.php';
require 'HTMLPurifier/URIScheme/sms.php';
require 'HTMLPurifier/VarParser/Flexible.php';
require 'HTMLPurifier/VarParser/Native.php';
1 change: 1 addition & 0 deletions library/HTMLPurifier.safe-includes.php
Original file line number Diff line number Diff line change
Expand Up @@ -226,5 +226,6 @@
require_once $__dir . '/HTMLPurifier/URIScheme/news.php';
require_once $__dir . '/HTMLPurifier/URIScheme/nntp.php';
require_once $__dir . '/HTMLPurifier/URIScheme/tel.php';
require_once $__dir . '/HTMLPurifier/URIScheme/sms.php';
require_once $__dir . '/HTMLPurifier/VarParser/Flexible.php';
require_once $__dir . '/HTMLPurifier/VarParser/Native.php';
2 changes: 1 addition & 1 deletion library/HTMLPurifier/ConfigSchema/schema.ser

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ array (
'nntp' => true,
'news' => true,
'tel' => true,
'sms' => true,
)
--DESCRIPTION--
Whitelist that defines the schemes that a URI is allowed to have. This
Expand Down
9 changes: 9 additions & 0 deletions library/HTMLPurifier/URIParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,15 @@ public function parse($uri)
$query = !empty($matches[6]) ? $matches[7] : null;
$fragment = !empty($matches[8]) ? $matches[9] : null;

// Special handling for SMS URIs with &body= syntax (non-standard but common)
// Split &body= from path into query for proper parsing
// This satisfies URIParserTest which expects &body= to be in query
if ($scheme === 'sms' && $authority === null && strpos($path, '&body=') !== false && $query === null) {
$parts = explode('&body=', $path, 2);
$path = $parts[0];
$query = 'body=' . (isset($parts[1]) ? $parts[1] : '');
}

// further parse authority
if ($authority !== null) {
$r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
Expand Down
118 changes: 118 additions & 0 deletions library/HTMLPurifier/URIScheme/sms.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
<?php

/**
* Validates sms (for text messaging).
*
* The relevant specification for this protocol is RFC 5724.
* This class normalizes SMS numbers so that they only include
* digits, optionally with a leading plus for international numbers.
*
* According to RFC 5724, SMS URIs support the 'body' parameter
* using the format: sms:number?body=message
* However, the format: sms:number&body=message is commonly used on
* the web, so it is also supported here.
*/

class HTMLPurifier_URIScheme_sms extends HTMLPurifier_URIScheme
{
/**
* @type bool
*/
public $browsable = false;

/**
* @type bool
*/
public $may_omit_host = true;

/**
* @param HTMLPurifier_URI $uri
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool
*/
public function doValidate(&$uri, $config, $context)
{
$uri->userinfo = null;
$uri->host = null;
$uri->port = null;

// Extract phone number and parameters from path and query
$phone_number = $uri->path;
$body_content = null;

// Check if path contains &param= syntax (non-standard but common)
if (strpos($phone_number, '&') !== false) {
// Split by & to get phone number and parameters
$parts = explode('&', $phone_number);
$phone_number = array_shift($parts); // First part is the phone number

// Parse parameters from path
foreach ($parts as $param) {
if (strpos($param, '=') !== false) {
list($param_name, $param_value) = explode('=', $param, 2);
if ($param_name === 'body') {
$body_content = $param_value;
}
// Other parameters (subject, invalid, etc.) are ignored/stripped
}
}
}

// Also check query string for body parameter (standard ?body= syntax)
// Query takes precedence if present (parser converts &body= to ?body=)
// The query may contain multiple parameters like "body=Hello&subject=Test"
if (!is_null($uri->query)) {
// Parse query parameters
$query_parts = explode('&', $uri->query);
foreach ($query_parts as $query_param) {
if (strpos($query_param, '=') !== false) {
list($param_name, $param_value) = explode('=', $query_param, 2);
if ($param_name === 'body') {
$body_content = $param_value;
break; // Only take the first body parameter
}
}
}
}

// Clean the phone number part
$phone_number = preg_replace(
'/(?!^\+)[^\d]/',
'',
rawurldecode($phone_number)
);

// Sanitize the body content if present
if ($body_content !== null) {
$body_content = $this->sanitizeBody($body_content);
}

// Reconstruct the path with &body= syntax (non-standard but common format)
if ($body_content !== null) {
// Always include &body= even if empty (per test expectations)
$uri->path = $phone_number . '&body=' . $body_content;
} else {
$uri->path = $phone_number;
}

// Clear query since we're using &body= in path format
$uri->query = null;

return true;
}

/**
* Sanitizes SMS body content
* @param string $body
* @return string
*/
private function sanitizeBody($body)
{
// Remove potentially dangerous characters
$sanitized = preg_replace('/[<>"\']/', '', $body);
// Remove any remaining script-like content
$sanitized = preg_replace('/script|alert|javascript/i', '', $sanitized);
return $sanitized;
}
}
4 changes: 4 additions & 0 deletions tests/HTMLPurifier/AttrDef/URITest.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ public function testIntegration()
$this->assertDef('tel:+15555555555');
$this->assertDef('tel:+15555 555 555', 'tel:+15555555555');
$this->assertDef('tel:+15555%20555%20555', 'tel:+15555555555');
$this->assertDef('sms:+15555555555');
$this->assertDef('sms:+15555 555 555', 'sms:+15555555555');
$this->assertDef('sms:+15555%20555%20555', 'sms:+15555555555');
$this->assertDef('sms:5555&body=HOME', 'sms:5555&body=HOME');
}

public function testIntegrationWithPercentEncoder()
Expand Down
8 changes: 8 additions & 0 deletions tests/HTMLPurifier/URIFilter/MakeAbsoluteTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ public function testPreserveAltSchemeWithTel()
$this->assertFiltering('tel:+15555%20555%20555');
}

public function testPreserveAltSchemeWithSms()
{
$this->assertFiltering('sms:+15555555555');
$this->assertFiltering('sms:+15555 555 555');
$this->assertFiltering('sms:+15555%20555%20555');
$this->assertFiltering('sms:5555&body=HOME');
}

public function testFilterIgnoreHTTPSpecialCase()
{
$this->assertFiltering('http:/', 'http://example.com/');
Expand Down
22 changes: 22 additions & 0 deletions tests/HTMLPurifier/URIParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,28 @@ public function testTelURI()
);
}

public function testSmsURI()
{
$this->assertParsing(
'sms:+1 (555) 555-5555',
'sms', null, null, null, '+1 (555) 555-5555', null, null
);
$this->assertParsing(
'sms:+1%20(555)%20555-5555',
'sms', null, null, null, '+1%20(555)%20555-5555', null, null
);
$this->assertParsing(
'sms:5555&body=HOME',
'sms',
null,
null,
null,
'5555',
'body=HOME',
null
);
}

public function testIPv4Address()
{
$this->assertParsing(
Expand Down
85 changes: 85 additions & 0 deletions tests/HTMLPurifier/URISchemeTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,15 @@ protected function assertValidation($uri, $expect_uri = true)
// convenience hack: the scheme should be explicitly specified
$scheme = $uri->getSchemeObj($this->config, $this->context);
$result = $scheme->validate($uri, $this->config, $this->context);

// Also validate the expected URI so it matches the validator's output format
if ($expect_uri !== false && $expect_uri !== true) {
$expect_scheme = $expect_uri->getSchemeObj($this->config, $this->context);
if ($expect_scheme) {
$expect_scheme->validate($expect_uri, $this->config, $this->context);
}
}

$this->assertEitherFailOrIdentical($result, $uri, $expect_uri);
}

Expand Down Expand Up @@ -215,6 +224,82 @@ public function test_tel_strip_letters()
);
}

public function test_sms_strip_punctuation()
{
$this->assertValidation(
'sms:+1 (555) 555-5555', 'sms:+15555555555'
);
}

public function test_sms_with_url_encoding()
{
$this->assertValidation(
'sms:+1%20(555)%20555-5555', 'sms:+15555555555'
);
}

public function test_sms_regular()
{
$this->assertValidation(
'sms:+15555555555'
);
}

public function test_sms_no_plus()
{
$this->assertValidation(
'sms:555-555-5555', 'sms:5555555555'
);
}

public function test_sms_strip_letters()
{
$this->assertValidation(
'sms:abcd1234',
'sms:1234'
);
}

public function test_sms_with_body_query()
{
$this->assertValidation(
'sms:5555&body=HOME',
'sms:5555&body=HOME'
);
}

public function test_sms_strip_invalid_params()
{
$this->assertValidation(
'sms:+15555555555&body=Hello&subject=Test',
'sms:+15555555555&body=Hello'
);
}

public function test_sms_with_url_encoded_body()
{
$this->assertValidation(
'sms:5555&body=Hello%20World',
'sms:5555&body=Hello%20World'
);
}

public function test_sms_strip_dangerous_query_params()
{
$this->assertValidation(
'sms:5555&body=<script>alert("xss")</script>&subject=Test',
'sms:5555&body='
);
}

public function test_sms_strip_invalid_query_params()
{
$this->assertValidation(
'sms:5555&body=HOME&invalid=param&subject=Test',
'sms:5555&body=HOME'
);
}

public function test_data_png()
{
$this->assertValidation(
Expand Down
Loading