Skip to content

Commit 95742bb

Browse files
committed
Implement unicodedata.east_asian_width()
1 parent cc7aea8 commit 95742bb

File tree

3 files changed

+67
-1
lines changed

3 files changed

+67
-1
lines changed

graalpython/com.oracle.graal.python.test/src/tests/test_unicodedata.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,3 +75,16 @@ def test_lookup(self):
7575
with self.assertRaisesRegex(KeyError, "name too long"):
7676
unicodedata.lookup("a" * 257)
7777

78+
79+
def test_east_asian_width(self):
80+
list = [1, 2, 3]
81+
with self.assertRaisesRegex(TypeError, r"east_asian_width\(\) argument must be a unicode character, not list"):
82+
unicodedata.east_asian_width(list)
83+
84+
multi_character_string = "abc"
85+
with self.assertRaisesRegex(TypeError, r"east_asian_width\(\) argument must be a unicode character, not str"):
86+
unicodedata.east_asian_width(multi_character_string)
87+
88+
empty_string = ""
89+
with self.assertRaisesRegex(TypeError, r"east_asian_width\(\) argument must be a unicode character, not str"):
90+
unicodedata.east_asian_width(empty_string)

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/UnicodeDataModuleBuiltins.java

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
*/
4141
package com.oracle.graal.python.builtins.modules;
4242

43+
import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError;
4344
import static com.oracle.graal.python.nodes.BuiltinNames.J_UNICODEDATA;
4445
import static com.oracle.graal.python.nodes.BuiltinNames.T_UNICODEDATA;
4546
import static com.oracle.graal.python.runtime.exception.PythonErrorType.KeyError;
@@ -50,6 +51,10 @@
5051
import java.util.List;
5152

5253
import com.oracle.graal.python.builtins.objects.module.PythonModule;
54+
import com.oracle.graal.python.nodes.function.builtins.PythonUnaryBuiltinNode;
55+
import com.oracle.graal.python.nodes.util.CastToTruffleStringNode;
56+
import com.oracle.truffle.api.strings.TruffleString.CodePointAtByteIndexNode;
57+
import com.oracle.truffle.api.strings.TruffleString.CodePointLengthNode;
5358
import com.oracle.truffle.api.strings.TruffleString.FromJavaStringNode;
5459
import com.oracle.truffle.api.strings.TruffleString.ToJavaStringNode;
5560
import org.graalvm.shadowed.com.ibm.icu.lang.UCharacter;
@@ -327,4 +332,52 @@ protected ArgumentClinicProvider getArgumentClinic() {
327332
return UnicodeDataModuleBuiltinsClinicProviders.CategoryNodeClinicProviderGen.INSTANCE;
328333
}
329334
}
335+
336+
// unicode.east_asia_width(chr)
337+
@Builtin(name = "east_asian_width", minNumOfPositionalArgs = 1, numOfPositionalOnlyArgs = 1, parameterNames = {"chr"})
338+
@GenerateNodeFactory
339+
public abstract static class EastAsianWidthNode extends PythonUnaryBuiltinNode {
340+
@Specialization
341+
@TruffleBoundary
342+
static TruffleString eastAsianWidth(Object object,
343+
@Bind Node inliningTarget,
344+
@Cached CastToTruffleStringNode castToTruffleStringNode,
345+
@Cached CodePointLengthNode codePointLengthNode,
346+
@Cached CodePointAtByteIndexNode codePointAtByteIndexNode,
347+
@Cached FromJavaStringNode fromJavaStringNode) {
348+
final TruffleString chr;
349+
350+
try {
351+
chr = CastToTruffleStringNode.getUncached().execute(inliningTarget, object);
352+
} catch (CannotCastException e) {
353+
throw PRaiseNode.raiseStatic(inliningTarget, TypeError, ErrorMessages.S_ARG_MUST_BE_S_NOT_P, "east_asian_width()", "a unicode character", object);
354+
}
355+
356+
if (CodePointLengthNode.getUncached().execute(chr, TS_ENCODING) != 1) {
357+
throw PRaiseNode.raiseStatic(inliningTarget, TypeError, ErrorMessages.S_ARG_MUST_BE_S_NOT_P, "east_asian_width()", "a unicode character", object);
358+
}
359+
360+
int codepoint = CodePointAtByteIndexNode.getUncached().execute(chr, 0, TS_ENCODING);
361+
String widthName = getWidthName(codepoint);
362+
return fromJavaStringNode.execute(widthName, TS_ENCODING);
363+
}
364+
365+
@TruffleBoundary
366+
private static String getWidthName(int codepoint) {
367+
int widthNameCode = UCharacter.getIntPropertyValue(codepoint, UProperty.EAST_ASIAN_WIDTH);
368+
String widthName;
369+
370+
switch (widthNameCode) {
371+
case UCharacter.EastAsianWidth.AMBIGUOUS -> widthName = "A";
372+
case UCharacter.EastAsianWidth.FULLWIDTH -> widthName = "F";
373+
case UCharacter.EastAsianWidth.HALFWIDTH -> widthName = "H";
374+
case UCharacter.EastAsianWidth.NARROW -> widthName = "Na";
375+
case UCharacter.EastAsianWidth.NEUTRAL -> widthName = "N";
376+
case UCharacter.EastAsianWidth.WIDE -> widthName = "W";
377+
default -> widthName = ""; // EastAsianWidth.COUNT
378+
}
379+
380+
return widthName;
381+
}
382+
}
330383
}

graalpython/lib-graalpython/unicodedata.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,6 @@
4040
__graalpython__.import_current_as_named_module_with_delegate(
4141
module_name="unicodedata",
4242
delegate_name="_cpython_unicodedata",
43-
delegate_attributes=['ucd_3_2_0', 'east_asian_width', 'combining'],
43+
delegate_attributes=['ucd_3_2_0', 'combining'],
4444
wrap_methods=False,
4545
owner_globals=globals())

0 commit comments

Comments
 (0)