40
40
*/
41
41
package com .oracle .graal .python .builtins .modules ;
42
42
43
- import static com .oracle .graal .python .runtime .exception .PythonErrorType .TypeError ;
44
43
import static com .oracle .graal .python .nodes .BuiltinNames .J_UNICODEDATA ;
45
44
import static com .oracle .graal .python .nodes .BuiltinNames .T_UNICODEDATA ;
46
45
import static com .oracle .graal .python .runtime .exception .PythonErrorType .KeyError ;
50
49
51
50
import java .util .List ;
52
51
53
- import com .oracle .graal .python .builtins .objects .module .PythonModule ;
54
- import com .oracle .graal .python .nodes .function .builtins .PythonUnaryBuiltinNode ;
55
- import com .oracle .graal .python .nodes .util .CannotCastException ;
56
- import com .oracle .graal .python .nodes .util .CastToTruffleStringNode ;
57
- import com .oracle .truffle .api .strings .TruffleString .CodePointAtByteIndexNode ;
58
- import com .oracle .truffle .api .strings .TruffleString .CodePointLengthNode ;
59
- import com .oracle .truffle .api .strings .TruffleString .FromJavaStringNode ;
60
- import com .oracle .truffle .api .strings .TruffleString .ToJavaStringNode ;
61
52
import org .graalvm .shadowed .com .ibm .icu .lang .UCharacter ;
53
+ import org .graalvm .shadowed .com .ibm .icu .lang .UCharacter .DecompositionType ;
62
54
import org .graalvm .shadowed .com .ibm .icu .lang .UProperty ;
63
55
import org .graalvm .shadowed .com .ibm .icu .text .Normalizer2 ;
64
56
import org .graalvm .shadowed .com .ibm .icu .util .VersionInfo ;
69
61
import com .oracle .graal .python .builtins .Python3Core ;
70
62
import com .oracle .graal .python .builtins .PythonBuiltins ;
71
63
import com .oracle .graal .python .builtins .objects .PNone ;
64
+ import com .oracle .graal .python .builtins .objects .module .PythonModule ;
72
65
import com .oracle .graal .python .nodes .ErrorMessages ;
73
66
import com .oracle .graal .python .nodes .PRaiseNode ;
74
67
import com .oracle .graal .python .nodes .function .PythonBuiltinBaseNode ;
85
78
import com .oracle .truffle .api .dsl .Specialization ;
86
79
import com .oracle .truffle .api .nodes .Node ;
87
80
import com .oracle .truffle .api .strings .TruffleString ;
81
+ import com .oracle .truffle .api .strings .TruffleString .FromJavaStringNode ;
82
+ import com .oracle .truffle .api .strings .TruffleString .ToJavaStringNode ;
88
83
89
84
@ CoreFunctions (defineModule = J_UNICODEDATA , isEager = true )
90
85
public final class UnicodeDataModuleBuiltins extends PythonBuiltins {
@@ -143,7 +138,7 @@ static Normalizer2 getNormalizer(TruffleString form) {
143
138
@ ArgumentClinic (name = "unistr" , conversion = ArgumentClinic .ClinicConversion .TString )
144
139
@ GenerateNodeFactory
145
140
@ ImportStatic (UnicodeDataModuleBuiltins .class )
146
- public abstract static class NormalizeNode extends PythonBinaryClinicBuiltinNode {
141
+ abstract static class NormalizeNode extends PythonBinaryClinicBuiltinNode {
147
142
@ Specialization (guards = {"cachedNormalizer != null" , "stringEquals(form, cachedForm, equalNode)" }, limit = "NORMALIZER_FORM_COUNT" )
148
143
static TruffleString normalize (@ SuppressWarnings ("unused" ) TruffleString form , TruffleString unistr ,
149
144
@ SuppressWarnings ("unused" ) @ Cached ("form" ) TruffleString cachedForm ,
@@ -155,8 +150,9 @@ static TruffleString normalize(@SuppressWarnings("unused") TruffleString form, T
155
150
}
156
151
157
152
@ Specialization (guards = "getNormalizer(form) == null" )
158
- TruffleString invalidForm (@ SuppressWarnings ("unused" ) TruffleString form , @ SuppressWarnings ("unused" ) TruffleString unistr ) {
159
- throw PRaiseNode .raiseStatic (this , ValueError , ErrorMessages .INVALID_NORMALIZATION_FORM );
153
+ static TruffleString invalidForm (@ SuppressWarnings ("unused" ) TruffleString form , @ SuppressWarnings ("unused" ) TruffleString unistr ,
154
+ @ Bind Node inliningTarget ) {
155
+ throw PRaiseNode .raiseStatic (inliningTarget , ValueError , ErrorMessages .INVALID_NORMALIZATION_FORM );
160
156
}
161
157
162
158
@ TruffleBoundary
@@ -176,19 +172,20 @@ protected ArgumentClinicProvider getArgumentClinic() {
176
172
@ ArgumentClinic (name = "unistr" , conversion = ArgumentClinic .ClinicConversion .TString )
177
173
@ GenerateNodeFactory
178
174
@ ImportStatic (UnicodeDataModuleBuiltins .class )
179
- public abstract static class IsNormalizedNode extends PythonBinaryClinicBuiltinNode {
175
+ abstract static class IsNormalizedNode extends PythonBinaryClinicBuiltinNode {
180
176
@ Specialization (guards = {"cachedNormalizer != null" , "stringEquals(form, cachedForm, equalNode)" }, limit = "NORMALIZER_FORM_COUNT" )
181
177
@ TruffleBoundary
182
- boolean isNormalized (@ SuppressWarnings ("unused" ) TruffleString form , TruffleString unistr ,
178
+ static boolean isNormalized (@ SuppressWarnings ("unused" ) TruffleString form , TruffleString unistr ,
183
179
@ SuppressWarnings ("unused" ) @ Cached ("form" ) TruffleString cachedForm ,
184
180
@ Cached ("getNormalizer(cachedForm)" ) Normalizer2 cachedNormalizer ,
185
181
@ SuppressWarnings ("unused" ) @ Cached TruffleString .EqualNode equalNode ) {
186
182
return cachedNormalizer .isNormalized (unistr .toJavaStringUncached ());
187
183
}
188
184
189
185
@ Specialization (guards = "getNormalizer(form) == null" )
190
- TruffleString invalidForm (@ SuppressWarnings ("unused" ) TruffleString form , @ SuppressWarnings ("unused" ) TruffleString unistr ) {
191
- throw PRaiseNode .raiseStatic (this , ValueError , ErrorMessages .INVALID_NORMALIZATION_FORM );
186
+ static TruffleString invalidForm (@ SuppressWarnings ("unused" ) TruffleString form , @ SuppressWarnings ("unused" ) TruffleString unistr ,
187
+ @ Bind Node inliningTarget ) {
188
+ throw PRaiseNode .raiseStatic (inliningTarget , ValueError , ErrorMessages .INVALID_NORMALIZATION_FORM );
192
189
}
193
190
194
191
@ Override
@@ -201,7 +198,7 @@ protected ArgumentClinicProvider getArgumentClinic() {
201
198
@ Builtin (name = "lookup" , minNumOfPositionalArgs = 1 , numOfPositionalOnlyArgs = 1 , parameterNames = {"name" })
202
199
@ ArgumentClinic (name = "name" , conversion = ArgumentClinic .ClinicConversion .TString )
203
200
@ GenerateNodeFactory
204
- public abstract static class LookupNode extends PythonUnaryClinicBuiltinNode {
201
+ abstract static class LookupNode extends PythonUnaryClinicBuiltinNode {
205
202
206
203
private static final int NAME_MAX_LENGTH = 256 ;
207
204
@@ -267,7 +264,7 @@ private static String getCharacterByUnicodeNameAlias(String unicodeName) {
267
264
@ Builtin (name = "name" , minNumOfPositionalArgs = 1 , parameterNames = {"chr" , "default" })
268
265
@ ArgumentClinic (name = "chr" , conversion = ArgumentClinic .ClinicConversion .CodePoint )
269
266
@ GenerateNodeFactory
270
- public abstract static class NameNode extends PythonBinaryClinicBuiltinNode {
267
+ abstract static class NameNode extends PythonBinaryClinicBuiltinNode {
271
268
272
269
@ Specialization
273
270
static Object name (int cp , Object defaultValue ,
@@ -294,7 +291,7 @@ protected ArgumentClinicProvider getArgumentClinic() {
294
291
@ Builtin (name = "bidirectional" , minNumOfPositionalArgs = 1 , numOfPositionalOnlyArgs = 1 , parameterNames = {"chr" })
295
292
@ ArgumentClinic (name = "chr" , conversion = ArgumentClinic .ClinicConversion .CodePoint )
296
293
@ GenerateNodeFactory
297
- public abstract static class BidirectionalNode extends PythonUnaryClinicBuiltinNode {
294
+ abstract static class BidirectionalNode extends PythonUnaryClinicBuiltinNode {
298
295
@ Specialization
299
296
static TruffleString bidirectional (int chr ,
300
297
@ Cached FromJavaStringNode fromJavaStringNode ) {
@@ -316,7 +313,7 @@ protected ArgumentClinicProvider getArgumentClinic() {
316
313
@ Builtin (name = "category" , minNumOfPositionalArgs = 1 , numOfPositionalOnlyArgs = 1 , parameterNames = {"chr" })
317
314
@ ArgumentClinic (name = "chr" , conversion = ArgumentClinic .ClinicConversion .CodePoint )
318
315
@ GenerateNodeFactory
319
- public abstract static class CategoryNode extends PythonUnaryClinicBuiltinNode {
316
+ abstract static class CategoryNode extends PythonUnaryClinicBuiltinNode {
320
317
@ Specialization
321
318
static TruffleString category (int chr ,
322
319
@ Cached FromJavaStringNode fromJavaStringNode ) {
@@ -336,57 +333,90 @@ protected ArgumentClinicProvider getArgumentClinic() {
336
333
337
334
// unicodedata.combining(chr)
338
335
@ Builtin (name = "combining" , minNumOfPositionalArgs = 1 , numOfPositionalOnlyArgs = 1 , parameterNames = {"chr" })
336
+ @ ArgumentClinic (name = "chr" , conversion = ArgumentClinic .ClinicConversion .CodePoint )
339
337
@ GenerateNodeFactory
340
- public abstract static class CombiningNode extends PythonUnaryBuiltinNode {
338
+ abstract static class CombiningNode extends PythonUnaryClinicBuiltinNode {
341
339
342
340
@ Specialization
343
341
@ TruffleBoundary
344
- static Object combining (Object object ,
345
- @ Bind Node inliningTarget ) {
346
- final TruffleString chr ;
342
+ static Object combining (int codepoint ) {
343
+ return UCharacter . getCombiningClass ( codepoint );
344
+ }
347
345
348
- try {
349
- chr = CastToTruffleStringNode . getUncached (). execute ( inliningTarget , object );
350
- } catch ( CannotCastException e ) {
351
- throw PRaiseNode . raiseStatic ( inliningTarget , TypeError , ErrorMessages . S_ARG_MUST_BE_S_NOT_P , "combining()" , "a unicode character" , object );
352
- }
346
+ @ Override
347
+ protected ArgumentClinicProvider getArgumentClinic () {
348
+ return UnicodeDataModuleBuiltinsClinicProviders . CombiningNodeClinicProviderGen . INSTANCE ;
349
+ }
350
+ }
353
351
354
- if (CodePointLengthNode .getUncached ().execute (chr , TS_ENCODING ) != 1 ) {
355
- throw PRaiseNode .raiseStatic (inliningTarget , TypeError , ErrorMessages .S_ARG_MUST_BE_S_NOT_P , "combining()" , "a unicode character" , object );
352
+ // unicodedata.decomposition(chr)
353
+ @ Builtin (name = "decomposition" , minNumOfPositionalArgs = 1 , numOfPositionalOnlyArgs = 1 , parameterNames = {"chr" })
354
+ @ ArgumentClinic (name = "chr" , conversion = ArgumentClinic .ClinicConversion .CodePoint )
355
+ @ GenerateNodeFactory
356
+ abstract static class DecompositionNode extends PythonUnaryClinicBuiltinNode {
357
+ @ Specialization
358
+ @ TruffleBoundary
359
+ static TruffleString decomposition (int codepoint ) {
360
+ int type = UCharacter .getIntPropertyValue (codepoint , UProperty .DECOMPOSITION_TYPE );
361
+ String prefix = getDecompositionPrefix (type );
362
+ String decomposition = Normalizer2 .getNFKDInstance ().getDecomposition (codepoint );
363
+
364
+ StringBuilder sb = new StringBuilder ();
365
+ if (prefix != null ) {
366
+ sb .append (prefix );
367
+ }
368
+ if (decomposition != null ) {
369
+ int cp ;
370
+ for (int i = 0 ; i < decomposition .length (); i += Character .charCount (cp )) {
371
+ if (!sb .isEmpty ()) {
372
+ sb .append (' ' );
373
+ }
374
+ cp = decomposition .codePointAt (i );
375
+ sb .append (String .format ("%04x" , cp ));
376
+ }
356
377
}
357
378
358
- int codepoint = CodePointAtByteIndexNode .getUncached ().execute (chr , 0 , TS_ENCODING );
359
- return UCharacter .getCombiningClass (codepoint );
379
+ return FromJavaStringNode .getUncached ().execute (sb .toString (), TS_ENCODING );
380
+ }
381
+
382
+ private static String getDecompositionPrefix (int type ) {
383
+ return switch (type ) {
384
+ case DecompositionType .NOBREAK -> "<noBreak>" ;
385
+ case DecompositionType .COMPAT -> "<compat>" ;
386
+ case DecompositionType .SUPER -> "<super>" ;
387
+ case DecompositionType .FRACTION -> "<fraction>" ;
388
+ case DecompositionType .SUB -> "<sub>" ;
389
+ case DecompositionType .FONT -> "<font>" ;
390
+ case DecompositionType .CIRCLE -> "<circle>" ;
391
+ case DecompositionType .WIDE -> "<wide>" ;
392
+ case DecompositionType .VERTICAL -> "<vertical>" ;
393
+ case DecompositionType .SQUARE -> "<square>" ;
394
+ case DecompositionType .ISOLATED -> "<isolated>" ;
395
+ case DecompositionType .FINAL -> "<final>" ;
396
+ case DecompositionType .INITIAL -> "<initial>" ;
397
+ case DecompositionType .MEDIAL -> "<medial>" ;
398
+ case DecompositionType .SMALL -> "<small>" ;
399
+ case DecompositionType .NARROW -> "<narrow>" ;
400
+ default -> null ;
401
+ };
402
+ }
403
+
404
+ @ Override
405
+ protected ArgumentClinicProvider getArgumentClinic () {
406
+ return UnicodeDataModuleBuiltinsClinicProviders .DecompositionNodeClinicProviderGen .INSTANCE ;
360
407
}
361
408
}
362
409
363
410
// unicode.east_asia_width(chr)
364
411
@ Builtin (name = "east_asian_width" , minNumOfPositionalArgs = 1 , numOfPositionalOnlyArgs = 1 , parameterNames = {"chr" })
412
+ @ ArgumentClinic (name = "chr" , conversion = ArgumentClinic .ClinicConversion .CodePoint )
365
413
@ GenerateNodeFactory
366
- public abstract static class EastAsianWidthNode extends PythonUnaryBuiltinNode {
414
+ abstract static class EastAsianWidthNode extends PythonUnaryClinicBuiltinNode {
367
415
@ Specialization
368
416
@ TruffleBoundary
369
- static TruffleString eastAsianWidth (Object object ,
370
- @ Bind Node inliningTarget ,
371
- @ Cached CastToTruffleStringNode castToTruffleStringNode ,
372
- @ Cached CodePointLengthNode codePointLengthNode ,
373
- @ Cached CodePointAtByteIndexNode codePointAtByteIndexNode ,
374
- @ Cached FromJavaStringNode fromJavaStringNode ) {
375
- final TruffleString chr ;
376
-
377
- try {
378
- chr = CastToTruffleStringNode .getUncached ().execute (inliningTarget , object );
379
- } catch (CannotCastException e ) {
380
- throw PRaiseNode .raiseStatic (inliningTarget , TypeError , ErrorMessages .S_ARG_MUST_BE_S_NOT_P , "east_asian_width()" , "a unicode character" , object );
381
- }
382
-
383
- if (CodePointLengthNode .getUncached ().execute (chr , TS_ENCODING ) != 1 ) {
384
- throw PRaiseNode .raiseStatic (inliningTarget , TypeError , ErrorMessages .S_ARG_MUST_BE_S_NOT_P , "east_asian_width()" , "a unicode character" , object );
385
- }
386
-
387
- int codepoint = CodePointAtByteIndexNode .getUncached ().execute (chr , 0 , TS_ENCODING );
417
+ static TruffleString eastAsianWidth (int codepoint ) {
388
418
String widthName = getWidthName (codepoint );
389
- return fromJavaStringNode .execute (widthName , TS_ENCODING );
419
+ return FromJavaStringNode . getUncached () .execute (widthName , TS_ENCODING );
390
420
}
391
421
392
422
@ TruffleBoundary
@@ -406,5 +436,10 @@ private static String getWidthName(int codepoint) {
406
436
407
437
return widthName ;
408
438
}
439
+
440
+ @ Override
441
+ protected ArgumentClinicProvider getArgumentClinic () {
442
+ return UnicodeDataModuleBuiltinsClinicProviders .EastAsianWidthNodeClinicProviderGen .INSTANCE ;
443
+ }
409
444
}
410
445
}
0 commit comments