@@ -4611,12 +4611,20 @@ Removes unsafe and undesired HTML from the passed content
46114611=cut
46124612
46134613my $SCRUBBER ;
4614+ my $RESTRICTIVE_SCRUBBER ;
46144615sub ScrubHTML {
46154616 my $Content = shift ;
4616- $SCRUBBER = _NewScrubber() unless $SCRUBBER ;
4617+ my %args = @_ ;
46174618
46184619 $Content = ' ' if !defined ($Content );
4619- return $SCRUBBER -> scrub($Content );
4620+ if ( $args {Restrictive } ) {
4621+ $RESTRICTIVE_SCRUBBER = _NewScrubber(Restrictive => 1) unless $RESTRICTIVE_SCRUBBER ;
4622+ return $RESTRICTIVE_SCRUBBER -> scrub($Content );
4623+ }
4624+ else {
4625+ $SCRUBBER = _NewScrubber() unless $SCRUBBER ;
4626+ return $SCRUBBER -> scrub($Content );
4627+ }
46204628}
46214629
46224630=head2 _NewScrubber
@@ -4698,7 +4706,45 @@ if (RT->Config->Get('ShowTransactionImages') or RT->Config->Get('ShowRemoteImage
46984706 $SCRUBBER_RULES {' img' }-> {' src' } = join " |" , @src ;
46994707}
47004708
4709+ our %RESTRICTIVE_SCRUBBER_RULES = (
4710+ a => {
4711+ %SCRUBBER_ALLOWED_ATTRIBUTES ,
4712+ href => sub {
4713+ my ( $self , $tag , $attr , $href ) = @_ ;
4714+ return $href unless $href ;
4715+
4716+ # Allow internal RT macros like __WebPath__, etc.
4717+ return $href if $href !~ / ^\w +:/ && $href =~ $SCRUBBER_ALLOWED_ATTRIBUTES {' href' };
4718+
4719+ my $uri = URI-> new($href );
4720+ unless ( $uri -> can(" host" ) && $uri -> host ) {
4721+ RT-> Logger-> warn (" Unknown link: $href " );
4722+ return ' ' ;
4723+ }
4724+
4725+ my $rt_host = RT::Interface::Web::_NormalizeHost( RT-> Config-> Get(' WebBaseURL' ) )-> host;
4726+ my $host = lc $uri -> host;
4727+ for my $allowed_domain ( $rt_host , @{ RT-> Config-> Get(' RestrictLinkDomains' ) || [] } ) {
4728+ if ( $allowed_domain =~ / \* / ) {
4729+
4730+ # Turn a literal * into a domain component or partial component match.
4731+ my $regex = join " [a-zA-Z0-9\- ]*" , map { quotemeta ($_ ) }
4732+ split /\*/, $allowed_domain ;
4733+ return $href if $host =~ / ^$regex $ /i ;
4734+ }
4735+ else {
4736+ return $href if $host eq lc ($allowed_domain );
4737+ }
4738+ }
4739+
4740+ RT-> Logger-> warning(" Blocked link: $href " );
4741+ return ' ' ;
4742+ },
4743+ },
4744+ );
4745+
47014746sub _NewScrubber {
4747+ my %args = @_ ;
47024748 require HTML::Scrubber;
47034749 my $scrubber = HTML::Scrubber-> new();
47044750
@@ -4726,7 +4772,7 @@ sub _NewScrubber {
47264772 );
47274773 $scrubber -> deny(qw[ *] );
47284774 $scrubber -> allow(@SCRUBBER_ALLOWED_TAGS );
4729- $scrubber -> rules(%SCRUBBER_RULES );
4775+ $scrubber -> rules( %SCRUBBER_RULES , $args { Restrictive } ? %RESTRICTIVE_SCRUBBER_RULES : () );
47304776
47314777 # Scrubbing comments is vital since IE conditional comments can contain
47324778 # arbitrary HTML and we'd pass it right on through.
0 commit comments