Skip to content

Commit 225e3bf

Browse files
committed
[GR-23268] Fixes for test_unicode, part 3
PullRequest: graalpython/1265
2 parents 55b5ccb + 0353e88 commit 225e3bf

File tree

13 files changed

+335
-169
lines changed

13 files changed

+335
-169
lines changed

graalpython/com.oracle.graal.python.annotations/src/com/oracle/graal/python/annotations/ArgumentClinic.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,5 +127,10 @@ enum ClinicConversion {
127127
* and {@link #useDefaultForNone()}.
128128
*/
129129
Index,
130+
/**
131+
* Corresponds to CPython's {@code int(accept={str})} convertor. Supports
132+
* {@link #defaultValue()}, and {@link #useDefaultForNone()}.
133+
*/
134+
CodePoint,
130135
}
131136
}

graalpython/com.oracle.graal.python.processor/src/com/oracle/graal/python/processor/ArgumentClinicModel.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ public static String getCodeSnippet(ArgumentClinic annotation, BuiltinAnnotation
7171
}
7272
case Int:
7373
return format("JavaIntConversionNodeGen.create(%s, %s)", annotation.defaultValue(), annotation.useDefaultForNone());
74+
case CodePoint:
75+
return format("CodePointConversionNodeGen.create(\"%s\", %s, %s)", builtin.name, annotation.defaultValue(), annotation.useDefaultForNone());
7476
case Index:
7577
return format("IndexConversionNodeGen.create(%s, %s)", annotation.defaultValue(), annotation.useDefaultForNone());
7678
case None:
@@ -103,6 +105,8 @@ private static String getConvertorImport(ArgumentClinic annotation) {
103105
return "JavaIntConversionNodeGen";
104106
case Index:
105107
return "IndexConversionNodeGen";
108+
case CodePoint:
109+
return "CodePointConversionNodeGen";
106110
case None:
107111
return "DefaultValueNode";
108112
default:
@@ -115,6 +119,7 @@ public static PrimitiveType[] getAcceptedPrimitiveTypes(ArgumentClinic annotatio
115119
case Boolean:
116120
return new PrimitiveType[]{PrimitiveType.Boolean};
117121
case String:
122+
case CodePoint:
118123
return new PrimitiveType[0];
119124
case Int:
120125
case Index:

graalpython/com.oracle.graal.python.test/src/tests/unittest_tags/test_unicode.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44
*graalpython.lib-python.3.test.test_unicode.CAPITest.test_copycharacters
55
*graalpython.lib-python.3.test.test_unicode.CAPITest.test_encode_decimal
66
*graalpython.lib-python.3.test.test_unicode.CAPITest.test_findchar
7+
*graalpython.lib-python.3.test.test_unicode.CAPITest.test_from_format
78
*graalpython.lib-python.3.test.test_unicode.CAPITest.test_pep393_utf8_caching_bug
89
*graalpython.lib-python.3.test.test_unicode.CAPITest.test_transform_decimal
910
*graalpython.lib-python.3.test.test_unicode.StringModuleTest.test_formatter_field_name_split
1011
*graalpython.lib-python.3.test.test_unicode.StringModuleTest.test_formatter_parser
1112
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test___contains__
13+
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_additional_rsplit
1214
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_additional_split
1315
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_ascii
1416
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_bytes_comparison
@@ -20,6 +22,7 @@
2022
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_codecs
2123
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_codecs_idna
2224
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_codecs_utf7
25+
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_compare
2326
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_comparison
2427
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_concatenation
2528
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_constructor
@@ -28,21 +31,28 @@
2831
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_contains
2932
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_count
3033
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_endswith
34+
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_expandtabs
3135
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_expandtabs_optimization
3236
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_expandtabs_overflows_gracefully
3337
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_extended_getslice
3438
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_find_etc_raise_correct_error_messages
3539
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_fixtype
3640
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_floatformatting
41+
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_format
3742
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_format_auto_numbering
3843
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_format_float
44+
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_format_huge_item_number
3945
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_format_huge_precision
4046
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_format_huge_width
47+
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_format_map
4148
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_format_subclass
4249
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_formatting_c_limits
50+
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_formatting_huge_precision
4351
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_formatting_huge_precision_c_limits
52+
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_formatting_huge_width
4453
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_formatting_with_enum
4554
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_free_after_iterating
55+
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_getnewargs
4656
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_hash
4757
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_inplace_rewrites
4858
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_invalid_cb_for_2bytes_seq
@@ -58,6 +68,7 @@
5868
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_isnumeric
5969
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_isprintable
6070
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_isspace
71+
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_isspace_invariant
6172
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_issue18183
6273
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_issue28598_strsubclass_rhs
6374
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_issue8271

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/UnicodeDataModuleBuiltins.java

Lines changed: 65 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -40,39 +40,37 @@
4040
*/
4141
package com.oracle.graal.python.builtins.modules;
4242

43-
import com.ibm.icu.lang.UCharacter;
44-
45-
import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError;
4643
import static com.oracle.graal.python.runtime.exception.PythonErrorType.ValueError;
4744

4845
import java.text.Normalizer;
4946
import java.util.List;
5047

48+
import com.ibm.icu.lang.UCharacter;
49+
import com.ibm.icu.lang.UProperty;
50+
import com.oracle.graal.python.annotations.ArgumentClinic;
5151
import com.oracle.graal.python.builtins.Builtin;
5252
import com.oracle.graal.python.builtins.CoreFunctions;
5353
import com.oracle.graal.python.builtins.PythonBuiltinClassType;
5454
import com.oracle.graal.python.builtins.PythonBuiltins;
5555
import com.oracle.graal.python.builtins.objects.PNone;
5656
import com.oracle.graal.python.builtins.objects.object.PythonObject;
57-
import com.oracle.graal.python.builtins.objects.str.PString;
5857
import com.oracle.graal.python.builtins.objects.type.PythonBuiltinClass;
5958
import com.oracle.graal.python.nodes.ErrorMessages;
60-
import com.oracle.graal.python.nodes.function.PythonBuiltinNode;
61-
import com.oracle.graal.python.nodes.util.CannotCastException;
62-
import com.oracle.graal.python.nodes.util.CastToJavaStringNode;
59+
import com.oracle.graal.python.nodes.function.PythonBuiltinBaseNode;
60+
import com.oracle.graal.python.nodes.function.builtins.PythonBinaryClinicBuiltinNode;
61+
import com.oracle.graal.python.nodes.function.builtins.PythonUnaryClinicBuiltinNode;
62+
import com.oracle.graal.python.nodes.function.builtins.clinic.ArgumentClinicProvider;
6363
import com.oracle.graal.python.runtime.PythonCore;
64-
import com.oracle.truffle.api.CompilerDirectives;
6564
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
6665
import com.oracle.truffle.api.dsl.Cached;
67-
import com.oracle.truffle.api.dsl.Fallback;
6866
import com.oracle.truffle.api.dsl.GenerateNodeFactory;
6967
import com.oracle.truffle.api.dsl.NodeFactory;
7068
import com.oracle.truffle.api.dsl.Specialization;
7169

7270
@CoreFunctions(defineModule = "unicodedata")
7371
public class UnicodeDataModuleBuiltins extends PythonBuiltins {
7472
@Override
75-
protected List<? extends NodeFactory<? extends PythonBuiltinNode>> getNodeFactories() {
73+
protected List<? extends NodeFactory<? extends PythonBuiltinBaseNode>> getNodeFactories() {
7674
return UnicodeDataModuleBuiltinsFactory.getFactories();
7775
}
7876

@@ -175,9 +173,11 @@ public void initialize(PythonCore core) {
175173
}
176174

177175
// unicodedata.normalize(form, unistr)
178-
@Builtin(name = "normalize", minNumOfPositionalArgs = 2)
176+
@Builtin(name = "normalize", minNumOfPositionalArgs = 2, parameterNames = {"form", "unistr"})
177+
@ArgumentClinic(name = "form", conversion = ArgumentClinic.ClinicConversion.String)
178+
@ArgumentClinic(name = "unistr", conversion = ArgumentClinic.ClinicConversion.String)
179179
@GenerateNodeFactory
180-
public abstract static class NormalizeNode extends PythonBuiltinNode {
180+
public abstract static class NormalizeNode extends PythonBinaryClinicBuiltinNode {
181181
@TruffleBoundary
182182
protected Normalizer.Form getForm(String form) {
183183
try {
@@ -198,25 +198,18 @@ public String normalize(@SuppressWarnings("unused") String form, String unistr,
198198
return Normalizer.normalize(unistr, cachedNormForm);
199199
}
200200

201-
@Specialization(guards = {"form.equals(cachedForm)"}, limit = "4")
202-
public String normalize(String form, PString unistr,
203-
@Cached("form") String cachedForm,
204-
@Cached CastToJavaStringNode castToJavaStringNode,
205-
@Cached("getForm(cachedForm)") Normalizer.Form cachedNormForm) {
206-
try {
207-
return normalize(form, castToJavaStringNode.execute(unistr), cachedForm, cachedNormForm);
208-
} catch (CannotCastException e) {
209-
CompilerDirectives.transferToInterpreterAndInvalidate();
210-
throw new IllegalStateException("should not be reached");
211-
}
201+
@Override
202+
protected ArgumentClinicProvider getArgumentClinic() {
203+
return UnicodeDataModuleBuiltinsClinicProviders.NormalizeNodeClinicProviderGen.INSTANCE;
212204
}
213-
214205
}
215206

216207
// unicodedata.is_normalized(form, unistr)
217-
@Builtin(name = "is_normalized", minNumOfPositionalArgs = 2)
208+
@Builtin(name = "is_normalized", minNumOfPositionalArgs = 2, parameterNames = {"form", "unistr"})
209+
@ArgumentClinic(name = "form", conversion = ArgumentClinic.ClinicConversion.String)
210+
@ArgumentClinic(name = "unistr", conversion = ArgumentClinic.ClinicConversion.String)
218211
@GenerateNodeFactory
219-
public abstract static class IsNormalizedNode extends PythonBuiltinNode {
212+
public abstract static class IsNormalizedNode extends PythonBinaryClinicBuiltinNode {
220213
@TruffleBoundary
221214
protected Normalizer.Form getForm(String form) {
222215
try {
@@ -237,36 +230,25 @@ public boolean isNormalized(@SuppressWarnings("unused") String form, String unis
237230
return Normalizer.isNormalized(unistr, cachedNormForm);
238231
}
239232

240-
@Specialization(guards = {"form.equals(cachedForm)"}, limit = "4")
241-
public boolean normalize(String form, PString unistr,
242-
@Cached("form") String cachedForm,
243-
@Cached CastToJavaStringNode castToJavaStringNode,
244-
@Cached("getForm(cachedForm)") Normalizer.Form cachedNormForm) {
245-
try {
246-
return isNormalized(form, castToJavaStringNode.execute(unistr), cachedForm, cachedNormForm);
247-
} catch (CannotCastException e) {
248-
CompilerDirectives.transferToInterpreterAndInvalidate();
249-
throw new IllegalStateException("should not be reached");
250-
}
233+
@Override
234+
protected ArgumentClinicProvider getArgumentClinic() {
235+
return UnicodeDataModuleBuiltinsClinicProviders.IsNormalizedNodeClinicProviderGen.INSTANCE;
251236
}
252237
}
253238

254-
// unicodedata.name(char, defaultValue)
255-
@Builtin(name = "name", minNumOfPositionalArgs = 1, maxNumOfPositionalArgs = 2)
239+
// unicodedata.name(chr, default)
240+
@Builtin(name = "name", minNumOfPositionalArgs = 1, parameterNames = {"chr", "default"})
241+
@ArgumentClinic(name = "chr", conversion = ArgumentClinic.ClinicConversion.CodePoint, defaultValue = "-1")
256242
@GenerateNodeFactory
257-
public abstract static class NameNode extends PythonBuiltinNode {
243+
public abstract static class NameNode extends PythonBinaryClinicBuiltinNode {
258244

259-
@TruffleBoundary
260-
protected Object getName(String chr, Object defaultValue) {
261-
if (chr.codePointCount(0, chr.length()) != 1) {
262-
throw raise(TypeError, ErrorMessages.ARG_MUST_BE_UNICODE, "name()", 1, chr);
263-
}
264-
int cp = Character.codePointAt(chr, 0);
245+
@Specialization
246+
public Object name(int cp, Object defaultValue) {
265247
if ((0xe000 <= cp && cp <= 0xf8ff) || (0xF0000 <= cp && cp <= 0xFFFFD) || (0x100000 <= cp && cp <= 0x10FFFD)) {
266248
// do not populate names from private use areas
267249
throw raise(ValueError, ErrorMessages.NO_SUCH_NAME);
268250
}
269-
String result = UCharacter.getName(cp);
251+
String result = getName(cp);
270252
if (result == null) {
271253
if (defaultValue == PNone.NO_VALUE) {
272254
throw raise(ValueError, ErrorMessages.NO_SUCH_NAME);
@@ -276,26 +258,48 @@ protected Object getName(String chr, Object defaultValue) {
276258
return result;
277259
}
278260

261+
@TruffleBoundary
262+
private static String getName(int cp) {
263+
return UCharacter.getName(cp);
264+
}
265+
266+
@Override
267+
protected ArgumentClinicProvider getArgumentClinic() {
268+
return UnicodeDataModuleBuiltinsClinicProviders.NameNodeClinicProviderGen.INSTANCE;
269+
}
270+
}
271+
272+
// unicodedata.bidirectional(char)
273+
@Builtin(name = "bidirectional", minNumOfPositionalArgs = 1, numOfPositionalOnlyArgs = 1, parameterNames = {"chr"})
274+
@ArgumentClinic(name = "chr", conversion = ArgumentClinic.ClinicConversion.CodePoint, defaultValue = "-1")
275+
@GenerateNodeFactory
276+
public abstract static class BidirectionalNode extends PythonUnaryClinicBuiltinNode {
279277
@Specialization
280-
public Object name(String chr, Object defaultValue) {
281-
return getName(chr, defaultValue);
278+
@TruffleBoundary
279+
static String bidirectional(int chr) {
280+
return UCharacter.getPropertyValueName(UProperty.BIDI_CLASS, UCharacter.getDirection(chr), UProperty.NameChoice.SHORT);
282281
}
283282

283+
@Override
284+
protected ArgumentClinicProvider getArgumentClinic() {
285+
return UnicodeDataModuleBuiltinsClinicProviders.BidirectionalNodeClinicProviderGen.INSTANCE;
286+
}
287+
}
288+
289+
// unicodedata.category(char)
290+
@Builtin(name = "category", minNumOfPositionalArgs = 1, numOfPositionalOnlyArgs = 1, parameterNames = {"chr"})
291+
@ArgumentClinic(name = "chr", conversion = ArgumentClinic.ClinicConversion.CodePoint, defaultValue = "-1")
292+
@GenerateNodeFactory
293+
public abstract static class CategoryNode extends PythonUnaryClinicBuiltinNode {
284294
@Specialization
285-
public Object name(PString pchr, Object defaultValue,
286-
@Cached CastToJavaStringNode castToJavaStringNode) {
287-
String chr;
288-
try {
289-
chr = castToJavaStringNode.execute(pchr);
290-
} catch (CannotCastException e) {
291-
throw CompilerDirectives.shouldNotReachHere(e);
292-
}
293-
return getName(chr, defaultValue);
295+
@TruffleBoundary
296+
static String category(int chr) {
297+
return UCharacter.getPropertyValueName(UProperty.GENERAL_CATEGORY, UCharacter.getType(chr), UProperty.NameChoice.SHORT);
294298
}
295299

296-
@Fallback
297-
public Object name(Object chr, @SuppressWarnings("unused") Object defaultValue) {
298-
throw raise(TypeError, ErrorMessages.ARG_MUST_BE_UNICODE, "name()", 1, chr);
300+
@Override
301+
protected ArgumentClinicProvider getArgumentClinic() {
302+
return UnicodeDataModuleBuiltinsClinicProviders.CategoryNodeClinicProviderGen.INSTANCE;
299303
}
300304
}
301305
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/PString.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,16 @@ public static int charCount(int codePoint) {
281281
return Character.charCount(codePoint);
282282
}
283283

284+
@TruffleBoundary(allowInlining = true)
285+
public static boolean isHighSurrogate(char ch) {
286+
return Character.isHighSurrogate(ch);
287+
}
288+
289+
@TruffleBoundary(allowInlining = true)
290+
public static boolean isLowSurrogate(char ch) {
291+
return Character.isLowSurrogate(ch);
292+
}
293+
284294
@TruffleBoundary(allowInlining = true)
285295
public static int indexOf(String s, String sub, int fromIndex) {
286296
return s.indexOf(sub, fromIndex);

0 commit comments

Comments
 (0)