@@ -1448,6 +1448,36 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
1448
1448
}
1449
1449
1450
1450
1451
+ static int
1452
+ _parse_u_plus (const char * codepoint , int namelen , Py_UCS4 * code )
1453
+ {
1454
+ const int hex_len = namelen - 2 ;
1455
+ if (codepoint [0 ] != 'U' || codepoint [1 ] != '+' ) {
1456
+ return 0 ;
1457
+ }
1458
+ if (hex_len < 4 || hex_len > 6 ) {
1459
+ PyErr_SetString (PyExc_ValueError , "invalid codepoint notation length" );
1460
+ return -1 ;
1461
+ }
1462
+
1463
+ char buf [7 ];
1464
+ memcpy (buf , codepoint + 2 , hex_len );
1465
+ buf [hex_len ] = '\0' ;
1466
+
1467
+ char * endptr = NULL ;
1468
+ const unsigned long v = strtoul (buf , & endptr , 16 );
1469
+
1470
+ if (* endptr != '\0' || v > 0x10ffff ) {
1471
+ PyErr_Format (PyExc_ValueError ,
1472
+ "invalid codepoint notation '%.*s'" , namelen , codepoint );
1473
+ return -1 ;
1474
+ }
1475
+
1476
+ * code = (Py_UCS4 )v ;
1477
+ return 1 ;
1478
+ }
1479
+
1480
+
1451
1481
static int
1452
1482
capi_getcode (const char * name , int namelen , Py_UCS4 * code ,
1453
1483
int with_named_seq )
@@ -1458,6 +1488,12 @@ capi_getcode(const char* name, int namelen, Py_UCS4* code,
1458
1488
return _check_alias_and_seq (code , with_named_seq );
1459
1489
}
1460
1490
1491
+ static int
1492
+ capi_parse_u_plus (const char * name , int namelen , Py_UCS4 * code )
1493
+ {
1494
+ return _parse_u_plus (name , namelen , code );
1495
+ }
1496
+
1461
1497
static void
1462
1498
unicodedata_destroy_capi (PyObject * capsule )
1463
1499
{
@@ -1475,6 +1511,7 @@ unicodedata_create_capi(void)
1475
1511
}
1476
1512
capi -> getname = capi_getucname ;
1477
1513
capi -> getcode = capi_getcode ;
1514
+ capi -> parse_u_plus = capi_parse_u_plus ;
1478
1515
1479
1516
PyObject * capsule = PyCapsule_New (capi ,
1480
1517
PyUnicodeData_CAPSULE_NAME ,
@@ -1543,6 +1580,16 @@ unicodedata_UCD_lookup_impl(PyObject *self, const char *name,
1543
1580
{
1544
1581
Py_UCS4 code ;
1545
1582
unsigned int index ;
1583
+
1584
+ const int check = _parse_u_plus (name , (int )name_length , & code );
1585
+ if (check == 1 ) {
1586
+ return PyUnicode_FromOrdinal (code );
1587
+ }
1588
+ if (check == -1 ) {
1589
+ /* Error set in _parse_u_plus */
1590
+ return NULL ;
1591
+ }
1592
+
1546
1593
if (name_length > NAME_MAXLEN ) {
1547
1594
PyErr_SetString (PyExc_KeyError , "name too long" );
1548
1595
return NULL ;
0 commit comments