Skip to content

Commit 7e1ce8a

Browse files
committed
raise when __doc__ is not representable as utf-8
1 parent 6214e7f commit 7e1ce8a

File tree

3 files changed

+27
-0
lines changed

3 files changed

+27
-0
lines changed

graalpython/com.oracle.graal.python.test/src/tests/unittest_tags/test_builtin.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,5 @@
5050
*graalpython.lib-python.3.test.test_builtin.TestBreakpoint.test_envar_unimportable
5151
*graalpython.lib-python.3.test.test_builtin.TestSorted.test_basic
5252
*graalpython.lib-python.3.test.test_builtin.TestSorted.test_inputtypes
53+
*graalpython.lib-python.3.test.test_builtin.TestType.test_type_doc
5354
*graalpython.lib-python.3.test.test_builtin.TestType.test_type_nokwargs

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/BuiltinConstructors.java

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@
8484
import static com.oracle.graal.python.runtime.exception.PythonErrorType.ValueError;
8585

8686
import java.math.BigInteger;
87+
import java.nio.charset.Charset;
88+
import java.nio.charset.CodingErrorAction;
89+
import java.nio.charset.StandardCharsets;
8790
import java.util.Arrays;
8891
import java.util.List;
8992
import java.util.Locale;
@@ -160,6 +163,7 @@
160163
import com.oracle.graal.python.nodes.BuiltinNames;
161164
import com.oracle.graal.python.nodes.ErrorMessages;
162165
import com.oracle.graal.python.nodes.PGuards;
166+
import com.oracle.graal.python.nodes.SpecialAttributeNames;
163167
import com.oracle.graal.python.nodes.SpecialMethodNames;
164168
import com.oracle.graal.python.nodes.attributes.GetAttributeNode;
165169
import com.oracle.graal.python.nodes.attributes.GetAttributeNode.GetAnyAttributeNode;
@@ -2309,6 +2313,22 @@ private PythonClass typeMetaclass(VirtualFrame frame, String name, PTuple bases,
23092313
} else {
23102314
pythonClass.setAttribute(key, value);
23112315
}
2316+
} else if (SpecialAttributeNames.__DOC__.equals(key)) {
2317+
// CPython sets tp_doc to a copy of dict['__doc__'], if that is a string. It
2318+
// forcibly encodes the string as UTF-8, and raises an error if that is not
2319+
// possible.
2320+
String doc = null;
2321+
if (value instanceof String) {
2322+
doc = (String) value;
2323+
} else if (value instanceof PString) {
2324+
doc = ((PString) value).getValue();
2325+
}
2326+
if (doc != null) {
2327+
if (!canEncode(doc)) {
2328+
throw raise(PythonBuiltinClassType.UnicodeEncodeError, ErrorMessages.CANNOT_ENCODE_DOCSTR, doc);
2329+
}
2330+
}
2331+
pythonClass.setAttribute(key, value);
23122332
} else {
23132333
pythonClass.setAttribute(key, value);
23142334
}
@@ -2379,6 +2399,11 @@ private PythonClass typeMetaclass(VirtualFrame frame, String name, PTuple bases,
23792399
return pythonClass;
23802400
}
23812401

2402+
@TruffleBoundary
2403+
private static boolean canEncode(String doc) {
2404+
return StandardCharsets.UTF_8.newEncoder().canEncode(doc);
2405+
}
2406+
23822407
@TruffleBoundary
23832408
private PTuple copySlots(String className, SequenceStorage slotList, int slotlen, boolean add_dict, boolean add_weak, PDict namespace, HashingStorageLibrary nslib) {
23842409
SequenceStorage newSlots = new ObjectSequenceStorage(slotlen - PInt.intValue(add_dict) - PInt.intValue(add_weak));

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/ErrorMessages.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,4 +517,5 @@ public abstract class ErrorMessages {
517517
public static final String S_ALIGNMENT_FLAG_NOT_ALLOWED_FOR_COMPLEX_FMT = "'%c' alignment flag is not allowed in complex format specifier";
518518
public static final String ZERO_PADDING_NOT_ALLOWED_FOR_COMPLEX_FMT = "Zero padding is not allowed in complex format specifier";
519519
public static final String POW_THIRD_ARG_CANNOT_BE_ZERO = "pow() 3rd argument cannot be 0";
520+
public static final String CANNOT_ENCODE_DOCSTR = "'utf-8' codec can't encode docstring '%s'";
520521
}

0 commit comments

Comments
 (0)