|
| 1 | +<?php /* |
| 2 | ++----------------------------------------------------------------------+ |
| 3 | +| Copyright (c) 1997-2025 The PHP Group | |
| 4 | ++----------------------------------------------------------------------+ |
| 5 | +| This source file is subject to version 3.01 of the PHP license, | |
| 6 | +| that is bundled with this package in the file LICENSE, and is | |
| 7 | +| available through the world-wide-web at the following url: | |
| 8 | +| https://www.php.net/license/3_01.txt. | |
| 9 | +| If you did not receive a copy of the PHP license and are unable to | |
| 10 | +| obtain it through the world-wide-web, please send a note to | |
| 11 | +| [email protected], so we can mail you a copy immediately. | |
| 12 | ++----------------------------------------------------------------------+ |
| 13 | +| Authors: André L F S Bacci <ae php.net> | |
| 14 | ++----------------------------------------------------------------------+ |
| 15 | +
|
| 16 | +# Description |
| 17 | +
|
| 18 | +This command line utility test if an file is valid standalone XML file, |
| 19 | +accepting undefined entities references. If an directory is informed, |
| 20 | +the test is applied in all .xml files in directory and sub directories. |
| 21 | +
|
| 22 | +This tool also cares for directories marked with .xmlfragmentdir, so |
| 23 | +theses files are tested in relaxed semantics for XML fragments. */ |
| 24 | + |
| 25 | +ini_set( 'display_errors' , 1 ); |
| 26 | +ini_set( 'display_startup_errors' , 1 ); |
| 27 | +error_reporting( E_ALL ); |
| 28 | + |
| 29 | +if ( count( $argv ) < 2 ) |
| 30 | + print_usage_exit( $argv[0] ); |
| 31 | + |
| 32 | +array_shift( $argv ); |
| 33 | +foreach( $argv as $arg ) |
| 34 | +{ |
| 35 | + if ( file_exists( $arg ) ) |
| 36 | + { |
| 37 | + if ( is_file( $arg ) ) |
| 38 | + testFile( $arg ); |
| 39 | + if ( is_dir( $arg ) ) |
| 40 | + testDir( $arg ); |
| 41 | + continue; |
| 42 | + } |
| 43 | + echo "Path does not exist: $arg\n"; |
| 44 | +} |
| 45 | + |
| 46 | +function print_usage_exit( $cmd ) |
| 47 | +{ |
| 48 | + fwrite( STDERR , " Wrong paramater count. Usage:\n" ); |
| 49 | + fwrite( STDERR , " {$cmd} path:\n" ); |
| 50 | + exit; |
| 51 | +} |
| 52 | + |
| 53 | +function setup( string & $prefix , string & $suffix , string & $extra ) |
| 54 | +{ |
| 55 | + // Undefined entities generate TWO different error messages on libxml |
| 56 | + // - "Entity '?' not defined" (for entity inside elements) |
| 57 | + // - "Extra content at the end of the document" (entity outside elements) |
| 58 | + |
| 59 | + $inside = "<x>&ZZZ;</x>"; |
| 60 | + $outside = "<x/>&ZZZ;"; |
| 61 | + |
| 62 | + $doc = new DOMDocument(); |
| 63 | + $doc->recover = true; |
| 64 | + $doc->resolveExternals = false; |
| 65 | + $doc->substituteEntities = false; |
| 66 | + libxml_use_internal_errors( true ); |
| 67 | + |
| 68 | + $doc->loadXML( $inside ); |
| 69 | + $message = trim( libxml_get_errors()[0]->message ); |
| 70 | + $message = str_replace( "ZZZ" , "\f" , $message ); |
| 71 | + [ $prefix , $suffix ] = explode( "\f" , $message ); |
| 72 | + libxml_clear_errors(); |
| 73 | + |
| 74 | + $doc->loadXML( $outside ); |
| 75 | + $extra = trim( libxml_get_errors()[0]->message ); |
| 76 | + libxml_clear_errors(); |
| 77 | +} |
| 78 | + |
| 79 | +function testFile( string $filename , bool $fragment = false ) |
| 80 | +{ |
| 81 | + static $prefix = "", $suffix = "", $extra = ""; |
| 82 | + if ( $extra == "" ) |
| 83 | + setup( $prefix , $suffix , $extra ); |
| 84 | + |
| 85 | + $doc = new DOMDocument(); |
| 86 | + $doc->recover = true; |
| 87 | + $doc->resolveExternals = false; |
| 88 | + $doc->substituteEntities = false; |
| 89 | + libxml_use_internal_errors( true ); |
| 90 | + |
| 91 | + $contents = file_get_contents( $filename ); |
| 92 | + if ( $fragment ) |
| 93 | + $contents = "<f>{$contents}</f>"; |
| 94 | + $doc->loadXML( $contents ); |
| 95 | + |
| 96 | + $errors = libxml_get_errors(); |
| 97 | + libxml_clear_errors(); |
| 98 | + |
| 99 | + foreach( $errors as $error ) |
| 100 | + { |
| 101 | + $message = trim( $error->message ); |
| 102 | + $hintFragDir = false; |
| 103 | + |
| 104 | + if ( str_starts_with( $message , $prefix ) && str_ends_with( $message , $suffix ) ) |
| 105 | + continue; |
| 106 | + //if ( $message == $extra ) // Disabled as unnecessary. Also, this indicates that some |
| 107 | + // continue; // some entity reference is used at an unusual position. |
| 108 | + if ( $message == $extra ) |
| 109 | + $hintFragDir = true; |
| 110 | + |
| 111 | + $lin = $error->line; |
| 112 | + $col = $error->column; |
| 113 | + echo "Broken XML file:\n"; |
| 114 | + echo " Path: $filename [$lin,$col]\n"; |
| 115 | + echo " Error: $message\n"; |
| 116 | + if ( $hintFragDir ) |
| 117 | + echo " Hint: Dir is marked with .xmlfragmentdir on doc-en? If not, check entity references.\n"; |
| 118 | + echo "\n"; |
| 119 | + return; |
| 120 | + } |
| 121 | +} |
| 122 | + |
| 123 | +function testDir( string $dir ) |
| 124 | +{ |
| 125 | + $dir = realpath( $dir ); |
| 126 | + $files = scandir( $dir ); |
| 127 | + $fragment = false; |
| 128 | + $subdirs = []; |
| 129 | + |
| 130 | + foreach( $files as $file ) |
| 131 | + { |
| 132 | + if ( $file == ".xmlfragmentdir" ) |
| 133 | + { |
| 134 | + $fragment = true; |
| 135 | + continue; |
| 136 | + } |
| 137 | + if ( $file[0] == "." ) |
| 138 | + continue; |
| 139 | + |
| 140 | + $fullpath = realpath( "$dir/$file" ); |
| 141 | + |
| 142 | + if ( is_dir ( $fullpath ) ) |
| 143 | + { |
| 144 | + $subdirs[] = $fullpath; |
| 145 | + continue; |
| 146 | + } |
| 147 | + |
| 148 | + if ( str_ends_with( $fullpath , ".xml" ) ) |
| 149 | + testFile( $fullpath , $fragment ); |
| 150 | + } |
| 151 | + |
| 152 | + foreach( $subdirs as $dir ) |
| 153 | + testDir( $dir ); |
| 154 | +} |
0 commit comments