4242
4343import static com .oracle .graal .python .nodes .BuiltinNames .J_UNICODEDATA ;
4444import static com .oracle .graal .python .nodes .BuiltinNames .T_UNICODEDATA ;
45+ import static com .oracle .graal .python .runtime .exception .PythonErrorType .KeyError ;
4546import static com .oracle .graal .python .runtime .exception .PythonErrorType .ValueError ;
4647import static com .oracle .graal .python .util .PythonUtils .TS_ENCODING ;
4748import static com .oracle .graal .python .util .PythonUtils .toTruffleStringUncached ;
4849
4950import java .util .List ;
5051
5152import com .oracle .graal .python .builtins .objects .module .PythonModule ;
53+ import com .oracle .truffle .api .strings .TruffleString .FromJavaStringNode ;
54+ import com .oracle .truffle .api .strings .TruffleString .ToJavaStringNode ;
5255import org .graalvm .shadowed .com .ibm .icu .lang .UCharacter ;
5356import org .graalvm .shadowed .com .ibm .icu .lang .UProperty ;
5457import org .graalvm .shadowed .com .ibm .icu .text .Normalizer2 ;
@@ -140,8 +143,8 @@ static TruffleString normalize(@SuppressWarnings("unused") TruffleString form, T
140143 @ SuppressWarnings ("unused" ) @ Cached ("form" ) TruffleString cachedForm ,
141144 @ Cached ("getNormalizer(cachedForm)" ) Normalizer2 cachedNormalizer ,
142145 @ SuppressWarnings ("unused" ) @ Cached TruffleString .EqualNode equalNode ,
143- @ Cached TruffleString . ToJavaStringNode toJavaStringNode ,
144- @ Exclusive @ Cached TruffleString . FromJavaStringNode fromJavaStringNode ) {
146+ @ Cached ToJavaStringNode toJavaStringNode ,
147+ @ Exclusive @ Cached FromJavaStringNode fromJavaStringNode ) {
145148 return fromJavaStringNode .execute (normalize (toJavaStringNode .execute (unistr ), cachedNormalizer ), TS_ENCODING );
146149 }
147150
@@ -188,6 +191,72 @@ protected ArgumentClinicProvider getArgumentClinic() {
188191 }
189192 }
190193
194+ // unicodedata.lookup(name)
195+ @ Builtin (name = "lookup" , minNumOfPositionalArgs = 1 , numOfPositionalOnlyArgs = 1 , parameterNames = {"name" })
196+ @ ArgumentClinic (name = "name" , conversion = ArgumentClinic .ClinicConversion .TString )
197+ @ GenerateNodeFactory
198+ public abstract static class LookupNode extends PythonUnaryClinicBuiltinNode {
199+
200+ private static final int NAME_MAX_LENGTH = 256 ;
201+
202+ @ Specialization
203+ @ TruffleBoundary
204+ static Object lookup (TruffleString name ,
205+ @ Bind Node inliningTarget ) {
206+ String nameString = ToJavaStringNode .getUncached ().execute (name );
207+ if (nameString .length () > NAME_MAX_LENGTH ) {
208+ throw PRaiseNode .raiseStatic (inliningTarget , KeyError , ErrorMessages .NAME_TOO_LONG );
209+ }
210+
211+ // TODO: support Unicode character named sequences (GR-68227)
212+ // see test/test_ucn.py.UnicodeFunctionsTest.test_named_sequences_full
213+ String character = getCharacterByUnicodeName (nameString );
214+ if (character == null ) {
215+ character = getCharacterByUnicodeNameAlias (nameString );
216+ }
217+ if (character == null ) {
218+ throw PRaiseNode .raiseStatic (inliningTarget , KeyError , ErrorMessages .UNDEFINED_CHARACTER_NAME , name );
219+ }
220+
221+ return FromJavaStringNode .getUncached ().execute (character , TS_ENCODING );
222+ }
223+
224+ @ Override
225+ protected ArgumentClinicProvider getArgumentClinic () {
226+ return UnicodeDataModuleBuiltinsClinicProviders .LookupNodeClinicProviderGen .INSTANCE ;
227+ }
228+
229+ /**
230+ * Finds a Unicode code point by its Unicode name and returns it as a single character
231+ * String. Returns null if name is not found.
232+ */
233+ @ TruffleBoundary
234+ private static String getCharacterByUnicodeName (String unicodeName ) {
235+ int codepoint = UCharacter .getCharFromName (unicodeName );
236+
237+ if (codepoint < 0 ) {
238+ return null ;
239+ }
240+
241+ return UCharacter .toString (codepoint );
242+ }
243+
244+ /**
245+ * Finds a Unicode code point by its Unicode name alias and returns it as a single character
246+ * String. Returns null if name alias is not found.
247+ */
248+ @ TruffleBoundary
249+ private static String getCharacterByUnicodeNameAlias (String unicodeName ) {
250+ int codepoint = UCharacter .getCharFromNameAlias (unicodeName );
251+
252+ if (codepoint < 0 ) {
253+ return null ;
254+ }
255+
256+ return UCharacter .toString (codepoint );
257+ }
258+ }
259+
191260 // unicodedata.name(chr, default)
192261 @ Builtin (name = "name" , minNumOfPositionalArgs = 1 , parameterNames = {"chr" , "default" })
193262 @ ArgumentClinic (name = "chr" , conversion = ArgumentClinic .ClinicConversion .CodePoint )
@@ -197,7 +266,7 @@ public abstract static class NameNode extends PythonBinaryClinicBuiltinNode {
197266 @ Specialization
198267 static Object name (int cp , Object defaultValue ,
199268 @ Bind Node inliningTarget ,
200- @ Cached TruffleString . FromJavaStringNode fromJavaStringNode ,
269+ @ Cached FromJavaStringNode fromJavaStringNode ,
201270 @ Cached PRaiseNode raiseNode ) {
202271 String result = getUnicodeName (cp );
203272 if (result == null ) {
@@ -222,7 +291,7 @@ protected ArgumentClinicProvider getArgumentClinic() {
222291 public abstract static class BidirectionalNode extends PythonUnaryClinicBuiltinNode {
223292 @ Specialization
224293 static TruffleString bidirectional (int chr ,
225- @ Cached TruffleString . FromJavaStringNode fromJavaStringNode ) {
294+ @ Cached FromJavaStringNode fromJavaStringNode ) {
226295 return fromJavaStringNode .execute (getBidiClassName (chr ), TS_ENCODING );
227296 }
228297
@@ -244,7 +313,7 @@ protected ArgumentClinicProvider getArgumentClinic() {
244313 public abstract static class CategoryNode extends PythonUnaryClinicBuiltinNode {
245314 @ Specialization
246315 static TruffleString category (int chr ,
247- @ Cached TruffleString . FromJavaStringNode fromJavaStringNode ) {
316+ @ Cached FromJavaStringNode fromJavaStringNode ) {
248317 return fromJavaStringNode .execute (getCategoryName (chr ), TS_ENCODING );
249318 }
250319
0 commit comments