1
- <?php
2
- /*
1
+ <?php /*
3
2
+----------------------------------------------------------------------+
4
3
| Copyright (c) 1997-2023 The PHP Group |
5
4
+----------------------------------------------------------------------+
16
15
| Description: Collect individual entities into an entities.ent file. |
17
16
+----------------------------------------------------------------------+
18
17
19
- # Conventions
18
+ # Mental model, or things that I would liked to know 20 years prior
20
19
21
- * `.dnt`: Simple text, "do not translate" file;
22
- * `.txt`: Simple text, translatable, untracked file;
23
- * `.xml`: Full XML, translatable, tracked file.
20
+ XML Entity processing has more in common with DOMDocumentFragment than
21
+ DOMElement. In other words, simple text and multi roots XML entities
22
+ are valid <!ENTITY> contents, whereas they are not valid XML documents.
23
+
24
+ Also, namespaces do not automatically "cross" between a parent
25
+ document and their includes, even if they are included in the same
26
+ file, as local textual entities. They are, for all intended purposes,
27
+ separated documents, with separated namespaces and have *expected*
28
+ different *default* namespaces.
29
+
30
+ So each one of, possibly multiple, "root" XML elements inside an
31
+ fragment need to be annotated with default namespace, even if the
32
+ "root" element occurs surrounded by text. For example:
33
+
34
+ - "text<tag>text</tag>", need one namespace, or it is invalid, and;
35
+ - "<tag></tag><tag></tag", need TWO namespaces, or it is also invalid.
36
+
37
+ # Individual tracked entities, or `.xml` files at `entities/`
38
+
39
+ As explained above, the individual entity contents are not really
40
+ valid XML *documents*, they are only at most valid XML *fragments*.
41
+
42
+ Yet, individual entities are stored in entities/ as .xml files, for
43
+ two reasons: first, text editors in general can highlights XML syntax,
44
+ and second, this allows normal revision tracking on then, without
45
+ requiring weird changes on `revcheck.php`.
46
+
47
+ # Small entities, group tracked (future)
48
+
49
+ For very small textual entities, down to simple text words, that may
50
+ never change, having tracking for each instance is an overkill.
51
+
52
+ It's planned to have new `manual.ent` and `website.ent` files
53
+ on each doc language, that internally are valid XML documents and
54
+ also replicates namespace declarations used on manual.xml.in, so
55
+ it will possible migrate the current <!ENTITY> infrastructure
56
+ to something that is more consumable for XML toolage (and will
57
+ avoid most of it not all XML namespacing hell).
58
+
59
+ These small files are to be splited into entities/ as individial
60
+ .tmp text files, for normal inclusion on manual.
24
61
25
- Each entitiesDir is read in order, overwriting previous defined
26
- entities with new ones (this is inverse of XML processing, where
27
- overwriting entities are ignored).
28
62
*/
29
63
64
+ ini_set ( 'display_errors ' , 1 );
65
+ ini_set ( 'display_startup_errors ' , 1 );
66
+ error_reporting ( E_ALL );
67
+
30
68
if ( count ( $ argv ) < 2 || in_array ( '--help ' , $ argv ) || in_array ( '-h ' , $ argv ) )
31
69
{
32
70
fwrite ( STDERR , "\nUsage: {$ argv [0 ]} entitiesDir [entitiesDir] \n\n" );
33
71
return ;
34
72
}
35
73
36
74
$ filename = __DIR__ . "/../.entities.ent " ; // sibling of .manual.xml
37
- touch ( $ filename ); // empty file, at minimum, and
38
- $ filename = realpath ( $ filename ); // realpath() fails if file not exists .
75
+ touch ( $ filename ); // empty file at minimum, and because
76
+ $ filename = realpath ( $ filename ); // realpath() fails if file does not exist .
39
77
40
- $ entities = []; // all entitites , already overriden
41
- $ expected = []; // entities that are expected to be oversidem (translatins)
42
- $ override = []; // overrides stattics
78
+ $ entities = []; // all entities , already replaced
79
+ $ expected = []; // entities that are expected to be replaced/translated
80
+ $ foundcnt = []; // tracks how many times entity name was found
43
81
44
82
$ langs = [];
45
83
$ detail = false ;
50
88
else
51
89
$ langs [] = $ argv [$ idx ];
52
90
53
- if ( $ detail )
54
- print "Creating file $ filename in verbose detail mode... \n" ;
55
- else
91
+ if ( ! $ detail )
56
92
print "Creating file $ filename... " ;
57
93
58
- for ( $ run = 0 ; $ run < count ( $ langs ) ; $ run ++ )
59
- parseDir ( $ langs [$ run ] , $ run > 0 );
94
+ for ( $ run = 0 ; $ run < count ( $ langs ) ; $ run ++ )
95
+ parseDir ( $ langs [$ run ] , ( count ( $ langs ) && $ run == 0 ) );
60
96
61
97
dump ( $ filename , $ entities );
98
+ [$ all , $ unt , $ over ] = verifyReplaced ( $ detail );
62
99
63
- if ( $ detail )
64
- {
65
- print "Done. \n" ;
66
- }
67
- else
100
+ if ( ! $ detail )
68
101
{
69
102
echo " done " ;
70
- [$ all , $ unt , $ over ] = verifyOverrides ( $ detail );
71
103
if ( $ unt + $ over > 0 )
72
- echo ": $ all entities, $ unt untranslated, $ over orerriden " ;
73
- echo ". \n" ;
104
+ echo ": $ all entities, $ unt untranslated, $ over overwrites. " ;
105
+ echo "\n" ;
74
106
}
75
107
exit ;
76
108
77
-
78
-
79
- function parseDir ( string $ dir , bool $ expectedOverride )
109
+ function parseDir ( string $ dir , bool $ expectedReplaced )
80
110
{
81
111
if ( ! is_dir ( $ dir ) )
82
- return ; // for now. When implanted in all languages: exit( "Not a directory: $dir\n" );
112
+ exit ( "Not a directory: $ dir \n" );
83
113
114
+ $ count = 0 ;
84
115
$ files = scandir ( $ dir );
85
116
86
117
foreach ( $ files as $ file )
@@ -94,18 +125,23 @@ function parseDir( string $dir , bool $expectedOverride )
94
125
continue ;
95
126
96
127
$ text = file_get_contents ( $ path );
97
- validateStore ( $ path , $ text , $ expectedOverride );
128
+ validateStore ( $ path , $ text , $ expectedReplaced );
129
+ $ count ++;
98
130
}
131
+
132
+ global $ detail ;
133
+ if ( $ detail )
134
+ echo "$ count files on $ dir \n" ;
99
135
}
100
136
101
- function validateStore ( string $ path , string $ text , bool $ expectedOverride )
137
+ function validateStore ( string $ path , string $ text , bool $ expectedReplaced )
102
138
{
103
139
$ trim = trim ( $ text );
104
140
if ( strlen ( $ trim ) == 0 )
105
141
{
106
- // Yes, there is empty entities, and they are valid entity , but not valid XML.
142
+ // Yes, there are empty entities, and they are valid entities , but not valid XML.
107
143
// see: en/language-snippets.ent mongodb.note.queryable-encryption-preview
108
- push ( $ path , $ text , $ expectedOverride , true );
144
+ push ( $ path , $ text , $ expectedReplaced , true );
109
145
return ;
110
146
}
111
147
@@ -133,110 +169,101 @@ function validateStore( string $path , string $text , bool $expectedOverride )
133
169
return ;
134
170
}
135
171
136
- $ inline = shouldInline ( $ dom );
137
- push ( $ path , $ text , $ expectedOverride , $ inline );
172
+ push ( $ path , $ text , $ expectedReplaced );
138
173
}
139
174
140
175
class EntityData
141
176
{
142
177
public function __construct (
143
178
public string $ path ,
144
179
public string $ name ,
145
- public string $ text ,
146
- public bool $ inline ) {}
180
+ public string $ text ) {}
147
181
}
148
182
149
- function push ( string $ path , string $ text , bool $ expectedOverride , bool $ inline )
183
+ function push ( string $ path , string $ text , bool $ expectedReplaced )
150
184
{
185
+
151
186
global $ entities ;
152
187
global $ expected ;
153
- global $ override ;
188
+ global $ foundcnt ;
154
189
155
190
$ info = pathinfo ( $ path );
156
191
$ name = $ info ["filename " ];
157
192
158
- if ( $ expectedOverride )
193
+ if ( $ expectedReplaced )
159
194
$ expected [] = $ name ;
160
195
161
- if ( ! isset ( $ override [$ name ] ) )
162
- $ override [$ name ] = 0 ;
196
+ if ( ! isset ( $ foundcnt [$ name ] ) )
197
+ $ foundcnt [$ name ] = 1 ;
163
198
else
164
- $ override [$ name ]++;
199
+ $ foundcnt [$ name ]++;
165
200
166
- $ entity = new EntityData ( $ path , $ name , $ text , $ inline );
201
+ $ entity = new EntityData ( $ path , $ name , $ text );
167
202
$ entities [$ name ] = $ entity ;
168
203
}
169
204
170
205
function dump ( string $ filename , array $ entities )
171
206
{
172
207
// In PHP 8.4 may be possible to construct an extended
173
208
// DOMEntity class with writable properties. For now,
174
- // creating entities files directly as text .
209
+ // creating entities files directly by hand .
175
210
176
211
$ file = fopen ( $ filename , "w " );
177
212
fputs ( $ file , "\n<!-- DO NOT COPY - Autogenerated by entities.php --> \n\n" );
178
213
179
214
foreach ( $ entities as $ name => $ entity )
180
215
{
181
- if ( $ entity ->inline )
182
- {
183
- $ text = str_replace ( "' " , '' ' , $ entity ->text );
184
- fputs ( $ file , "<!ENTITY $ name ' $ text'> \n\n" );
185
- }
186
- else
187
- {
188
- fputs ( $ file , "<!ENTITY $ name SYSTEM ' {$ entity ->path }'> \n\n" );
189
- }
190
- }
191
- fclose ( $ file );
192
- }
216
+ $ text = $ entity ->text ;
193
217
194
- function shouldInline ( DOMDocument $ dom ) : bool
195
- {
196
- // Pure text entities CANNOT be SYSTEMed (or libxml fails).
197
- // But entities that CONTAINS elements need to be SYSTEMed
198
- // to avoid quotation madness.
218
+ $ quote = "" ;
219
+ $ posSingle = strpos ( $ text , "' " );
220
+ $ posDouble = strpos ( $ text , '" ' );
221
+
222
+ if ( $ posSingle === false )
223
+ $ quote = "' " ;
224
+ if ( $ posDouble === false )
225
+ $ quote = '" ' ;
199
226
200
- // Why libxml/w3c? WHY?
227
+ // If the text contains mixed quoting, keeping it
228
+ // as an external file to avoid (re)quotation hell.
229
+
230
+ if ( $ quote == "" )
231
+ fputs ( $ file , "<!ENTITY $ name SYSTEM ' {$ entity ->path }'> \n\n" );
232
+ else
233
+ fputs ( $ file , "<!ENTITY $ name {$ quote }{$ text }{$ quote }> \n\n" );
234
+ }
201
235
202
- $ xpath = new DomXPath ( $ dom );
203
- $ elems = $ xpath ->query ( "child::* " );
204
- return ( $ elems ->length == 0 );
236
+ fclose ( $ file );
205
237
}
206
238
207
- function verifyOverrides ( bool $ outputDetail )
239
+ function verifyReplaced ( bool $ outputDetail )
208
240
{
209
241
global $ entities ;
210
242
global $ expected ;
211
- global $ override ;
243
+ global $ foundcnt ;
212
244
213
- $ countGenerated = count ( $ entities );
214
- $ countExpectedOverriden = 0 ;
215
- $ countUnexpectedOverriden = 0 ;
245
+ $ countUntranslated = 0 ;
246
+ $ countConstantChanged = 0 ;
216
247
217
248
foreach ( $ entities as $ name => $ text )
218
249
{
219
- $ times = $ override [$ name ];
250
+ $ replaced = $ foundcnt [$ name ] - 1 ;
251
+ $ expectedReplaced = in_array ( $ name , $ expected );
220
252
221
- if ( isset ( $ expected [ $ name ] ) )
253
+ if ( $ expectedReplaced && $ replaced != 1 )
222
254
{
223
- if ( $ times != 1 )
224
- {
225
- $ countExpectedOverriden ++;
226
- if ( $ outputDetail )
227
- print "Expected override entity $ name overriden $ times times. \n" ;
228
- }
255
+ $ countUntranslated ++;
256
+ if ( $ outputDetail )
257
+ print "Expected translated, replaced $ replaced times: \t$ name \n" ;
229
258
}
230
- else
259
+
260
+ elseif ( ! $ expectedReplaced && $ replaced != 0 )
231
261
{
232
- if ( $ times != 0 )
233
- {
234
- $ countUnexpectedOverriden ++;
235
- if ( $ outputDetail )
236
- print "Unexpected override entity $ name overriden $ times times. \n" ;
237
- }
262
+ $ countConstantChanged ++;
263
+ if ( $ outputDetail )
264
+ print "Unexpected replaced, replaced $ replaced times: \t$ name \n" ;
238
265
}
239
266
}
240
267
241
- return [$ countGenerated , $ countExpectedOverriden , $ countUnexpectedOverriden ];
268
+ return [count ( $ entities ) , $ countUntranslated , $ countConstantChanged ];
242
269
}
0 commit comments