Skip to content

Commit 6f7fa02

Browse files
author
André L F S Bacci
committed
Infrastructure for individual entity files
1 parent a5da02b commit 6f7fa02

File tree

3 files changed

+84
-48
lines changed

3 files changed

+84
-48
lines changed

.gitignore

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22
.manual.xml
33
.entities.ent
44
.revcheck.json
5-
install-unix.xml
6-
install-win.xml
75
manual.xml
86
version.xml
97
sources.xml

configure.php

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -739,11 +739,12 @@ function getFileModificationHistory(): array {
739739
globbetyglob("{$ac['basedir']}/scripts", 'make_scripts_executable');
740740

741741
{
742-
$cmd = escapeshellarg( $ac['PHP'] );
743-
$cmd .= ' ' . escapeshellarg( __DIR__ . '/scripts/entities.php' );
744-
$cmd .= ' ' . escapeshellarg( $ac['ROOTDIR'] . '/en/entities' );
742+
$cmd[] = escapeshellarg( $ac['PHP'] );
743+
$cmd[] = escapeshellarg( __DIR__ . '/scripts/entities.php' );
744+
$cmd[] = escapeshellarg( $ac['ROOTDIR'] . '/en/entities' );
745745
if ( $ac['LANG'] != 'en' )
746-
$cmd .= ' ' . escapeshellarg( $ac['ROOTDIR'] . '/' . $ac['LANG'] . '/entities' );
746+
$cmd[] = escapeshellarg( $ac['ROOTDIR'] . '/' . $ac['LANG'] . '/entities' );
747+
$cmd = implode( ' ' , $cmd );
747748
passthru( $cmd );
748749
}
749750

scripts/entities.php

Lines changed: 79 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,10 @@
3333
return;
3434
}
3535

36-
$generate = realpath( __DIR__ . "/../.entities.ent" ); // sibling of .manual.xml
36+
$filename = __DIR__ . "/../.entities.ent"; // sibling of .manual.xml
37+
touch( $filename ); // empty file, at minimum, and
38+
$filename = realpath( $filename ); // realpath() fails if file not exists.
39+
3740
$entities = []; // all entitites, already overriden
3841
$expected = []; // entities that are expected to be oversidem (translatins)
3942
$override = []; // overrides stattics
@@ -48,16 +51,14 @@
4851
$langs[] = $argv[$idx];
4952

5053
if ( $detail )
51-
print "Creating file $generate in verbose detail mode...\n";
54+
print "Creating file $filename in verbose detail mode...\n";
5255
else
53-
print "Creating file $generate...";
56+
print "Creating file $filename...";
5457

5558
for ( $run = 0 ; $run < count( $langs) ; $run++ )
5659
parseDir( $langs[$run] , $run > 0 );
5760

58-
dump( $entities );
59-
60-
[$count, $untranslated, $overriden] = verifyOverrides( $detail );
61+
dump( $filename , $entities );
6162

6263
if ( $detail )
6364
{
@@ -66,8 +67,9 @@
6667
else
6768
{
6869
echo " done";
69-
if ( $untranslated + $overriden > 0 )
70-
echo ": $count entities, $untranslated untranslated, $overriden orerriden";
70+
[$all, $unt, $over] = verifyOverrides( $detail );
71+
if ( $unt + $over > 0 )
72+
echo ": $all entities, $unt untranslated, $over orerriden";
7173
echo ".\n";
7274
}
7375
exit;
@@ -92,52 +94,59 @@ function parseDir( string $dir , bool $expectedOverride )
9294
continue;
9395

9496
$text = file_get_contents( $path );
95-
96-
if ( validate( $path , $text , false ) )
97-
{
98-
push( $path , $text , $expectedOverride );
99-
continue;
100-
}
101-
102-
$frag = "<frag>$text</frag>";
103-
if ( validate( $path , $frag , true ) )
104-
{
105-
push( $path , $text , $expectedOverride );
106-
continue;
107-
}
97+
validateStore( $path , $text , $expectedOverride );
10898
}
10999
}
110100

111-
function validate( string $path , string $text , bool $warn ) : bool
101+
function validateStore( string $path , string $text , bool $expectedOverride )
112102
{
103+
$trim = trim( $text );
104+
if ( strlen( $trim ) == 0 )
105+
{
106+
// Yes, there is empty entities, and they are valid entity, but not valid XML.
107+
// see: en/language-snippets.ent mongodb.note.queryable-encryption-preview
108+
push( $path , $text , $expectedOverride , true );
109+
return;
110+
}
111+
112+
$frag = "<root>$text</root>";
113+
113114
$dom = new DOMDocument( '1.0' , 'utf8' );
114115
$dom->recover = true;
115116
$dom->resolveExternals = false;
116117
libxml_use_internal_errors( true );
117118

118-
$res = $dom->loadXML( $text );
119+
$res = $dom->loadXML( $frag );
119120

120121
$err = libxml_get_errors();
121122
libxml_clear_errors();
122123

123124
foreach( $err as $item )
124125
{
125-
$msg = $item->message;
126+
$msg = trim( $item->message );
126127
if ( str_starts_with( $msg , "Entity '" ) && str_ends_with( $msg , "' not defined" ) )
127128
continue;
128129

129-
if ( $warn )
130-
{
131-
fwrite( STDERR , "\n XML load failed on entity file." );
132-
fwrite( STDERR , "\n Path: $path" );
133-
fwrite( STDERR , "\n Error: $msg\n" );
134-
}
135-
return false;
130+
fwrite( STDERR , "\n XML load failed on entity file." );
131+
fwrite( STDERR , "\n Path: $path" );
132+
fwrite( STDERR , "\n Error: $msg\n" );
133+
return;
136134
}
137-
return true;
135+
136+
$inline = shouldInline( $dom );
137+
push( $path , $text , $expectedOverride , $inline );
138138
}
139139

140-
function push( string $path , string $contents , bool $expectedOverride )
140+
class EntityData
141+
{
142+
public function __construct(
143+
public string $path ,
144+
public string $name ,
145+
public string $text ,
146+
public bool $inline ) {}
147+
}
148+
149+
function push( string $path , string $text , bool $expectedOverride , bool $inline )
141150
{
142151
global $entities;
143152
global $expected;
@@ -154,28 +163,56 @@ function push( string $path , string $contents , bool $expectedOverride )
154163
else
155164
$override[$name]++;
156165

157-
$entities[$name] = $contents;
166+
$entity = new EntityData( $path , $name , $text , $inline );
167+
$entities[$name] = $entity;
158168
}
159169

160-
function dump( array $entities )
170+
function dump( string $filename , array $entities )
161171
{
162-
global $generate;
163-
164172
// In PHP 8.4 may be possible to construct an extended
165173
// DOMEntity class with writable properties. For now,
166174
// creating entities files directly as text.
167175

168-
$file = fopen( $generate , "w" );
176+
$file = fopen( $filename , "w" );
169177
fputs( $file , "\n<!-- DO NOT COPY - Autogenerated by entities.php -->\n\n" );
170178

171-
foreach( $entities as $name => $text )
179+
foreach( $entities as $name => $entity )
172180
{
173-
$text = str_replace( "'" , '&apos;' , $text );
174-
fputs( $file , "<!ENTITY $name '$text'>\n\n");
181+
if ( $entity->inline )
182+
{
183+
$text = str_replace( "'" , '&apos;' , $entity->text );
184+
fputs( $file , "<!ENTITY $name '$text'>\n\n");
185+
}
186+
else
187+
{
188+
fputs( $file , "<!ENTITY $name SYSTEM '{$entity->path}'>\n\n");
189+
}
175190
}
176191
fclose( $file );
177192
}
178193

194+
function shouldInline( DOMDocument $dom ) : bool
195+
{
196+
// Pure text entities CANNOT be SYSTEMed (or libxml fails).
197+
// But entities that CONTAINS elements NEED to be SYSTEMed
198+
// to avoid quotation madness.
199+
200+
// Why libxml/w3c? WHY?
201+
202+
$xpath = new DomXPath( $dom );
203+
$elems = $xpath->query( "child::*" );
204+
return ( $elems->length == 0 );
205+
}
206+
207+
function shouldInlineRecurse( DOMNode $node ) : bool
208+
{
209+
if ( $node->nodeType == XML_ELEMENT_NODE )
210+
return true;
211+
foreach ( $node->childNodes as $node )
212+
return shouldInlineRecurse( $node );
213+
214+
}
215+
179216
function verifyOverrides( bool $outputDetail )
180217
{
181218
global $entities;
@@ -211,4 +248,4 @@ function verifyOverrides( bool $outputDetail )
211248
}
212249

213250
return [$countGenerated, $countExpectedOverriden, $countUnexpectedOverriden];
214-
}
251+
}

0 commit comments

Comments
 (0)