python · benjaminp · Sep 11, 2025 · Sep 9, 2025 · StanFromIreland · Sep 11, 2025
diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst
@@ -1843,9 +1843,9 @@ expression support in the :mod:`re` module).
    lowercase, :meth:`lower` would do nothing to ``'ß'``; :meth:`casefold`
    converts it to ``"ss"``.
 
-   The casefolding algorithm is
-   `described in section 3.13 'Default Case Folding' of the Unicode Standard
-   <https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-3/#G33992>`__.
+   The casefolding algorithm is `described in section 3.13.3 'Default Case
+   Folding' of the Unicode Standard
+   <https://www.unicode.org/versions/Unicode17.0.0/core-spec/chapter-3/#G53253>`__.
 
    .. versionadded:: 3.3
 
@@ -2056,7 +2056,7 @@ expression support in the :mod:`re` module).
    property being one of "Lm", "Lt", "Lu", "Ll", or "Lo".  Note that this is different
    from the `Alphabetic property defined in the section 4.10 'Letters, Alphabetic, and
    Ideographic' of the Unicode Standard
-   <https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-4/#G91002>`_.
+   <https://www.unicode.org/versions/Unicode17.0.0/core-spec/chapter-4/#G91002>`__.
 
 
 .. method:: str.isascii()
@@ -2196,9 +2196,9 @@ expression support in the :mod:`re` module).
    Return a copy of the string with all the cased characters [4]_ converted to
    lowercase.
 
-   The lowercasing algorithm used is
-   `described in section 3.13 'Default Case Folding' of the Unicode Standard
-   <https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-3/#G33992>`__.
+   The lowercasing algorithm used is `described in section 3.13.2 'Default Case
+   Conversion' of the Unicode Standard
+   <https://www.unicode.org/versions/Unicode17.0.0/core-spec/chapter-3/#G34078>`__.
 
 
 .. method:: str.lstrip(chars=None, /)
@@ -2561,9 +2561,9 @@ expression support in the :mod:`re` module).
    character(s) is not "Lu" (Letter, uppercase), but e.g. "Lt" (Letter,
    titlecase).
 
-   The uppercasing algorithm used is
-   `described in section 3.13 'Default Case Folding' of the Unicode Standard
-   <https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-3/#G33992>`__.
+   The uppercasing algorithm used is `described in section 3.13.2 'Default Case
+   Conversion' of the Unicode Standard
+   <https://www.unicode.org/versions/Unicode17.0.0/core-spec/chapter-3/#G34078>`__.
 
 
 .. method:: str.zfill(width, /)

diff --git a/Doc/library/unicodedata.rst b/Doc/library/unicodedata.rst
@@ -17,8 +17,8 @@
 
 This module provides access to the Unicode Character Database (UCD) which
 defines character properties for all Unicode characters. The data contained in
-this database is compiled from the `UCD version 16.0.0
-<https://www.unicode.org/Public/16.0.0/ucd>`_.
+this database is compiled from the `UCD version 17.0.0
+<https://www.unicode.org/Public/17.0.0/ucd>`_.
 
 The module uses the same names and symbols as defined by Unicode
 Standard Annex #44, `"Unicode Character Database"
@@ -211,6 +211,6 @@ In addition, the module exposes the following constant:
 
 .. rubric:: Footnotes
 
-.. [#] https://www.unicode.org/Public/16.0.0/ucd/NameAliases.txt
+.. [#] https://www.unicode.org/Public/17.0.0/ucd/NameAliases.txt
 
-.. [#] https://www.unicode.org/Public/16.0.0/ucd/NamedSequences.txt
+.. [#] https://www.unicode.org/Public/17.0.0/ucd/NamedSequences.txt
@@ -384,8 +384,8 @@ Character Database.
 
 
 .. _UAX-31: https://www.unicode.org/reports/tr31/
-.. _PropList.txt: https://www.unicode.org/Public/16.0.0/ucd/PropList.txt
-.. _DerivedCoreProperties.txt: https://www.unicode.org/Public/16.0.0/ucd/DerivedCoreProperties.txt
+.. _PropList.txt: https://www.unicode.org/Public/17.0.0/ucd/PropList.txt
+.. _DerivedCoreProperties.txt: https://www.unicode.org/Public/17.0.0/ucd/DerivedCoreProperties.txt
 .. _normalization form: https://www.unicode.org/reports/tr15/#Norm_Forms
-.. _normalization form: https://www.unicode.org/reports/tr15/#Norm_Forms
+.. _normalization form: https://www.unicode.org/reports/tr15/tr15-57.html#Norm_Forms
-.. _normalization form: https://www.unicode.org/reports/tr15/#Norm_Forms
+.. _normalization form: https://www.unicode.org/reports/tr15/tr15-57.html#Norm_Forms
 
 
@@ -793,7 +793,7 @@ with the given *name*::
 This sequence cannot appear in :ref:`bytes literals <bytes-literal>`.
 
 .. versionchanged:: 3.3
-   Support for `name aliases <https://www.unicode.org/Public/16.0.0/ucd/NameAliases.txt>`__
+   Support for `name aliases <https://www.unicode.org/Public/17.0.0/ucd/NameAliases.txt>`__
    has been added.
 
 .. _string-escape-long-hex:

@@ -648,6 +648,12 @@ typing
   (Contributed by Nikita Sobolev in :gh:`137191`.)
 
 
+unicodedata
+-----------
+
+* The Unicode database has been updated to Unicode 17.0.0.
+
+
 wave
 ----
 

diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py
@@ -24,7 +24,7 @@
 class UnicodeMethodsTest(unittest.TestCase):
 
     # update this, if the database changes
-    expectedchecksum = '9e43ee3929471739680c0e705482b4ae1c4122e4'
+    expectedchecksum = '8b2615a9fc627676cbc0b6fac0191177df97ef5f'
 
     @requires_resource('cpu')
     def test_method_checksum(self):
@@ -77,7 +77,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
 
     # Update this if the database changes. Make sure to do a full rebuild
     # (e.g. 'make distclean && make') to get the correct checksum.
-    expectedchecksum = '23ab09ed4abdf93db23b97359108ed630dd8311d'
+    expectedchecksum = '65670ae03a324c5f9e826a4de3e25bae4d73c9b7'
 
     @requires_resource('cpu')
     def test_function_checksum(self):

diff --git a/Misc/NEWS.d/next/Library/2025-09-09-10-48-26.gh-issue-138706.xB--LX.rst b/Misc/NEWS.d/next/Library/2025-09-09-10-48-26.gh-issue-138706.xB--LX.rst
@@ -0,0 +1 @@
+Update :mod:`unicodedata` database to Unicode 17.0.0.
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
@@ -1020,13 +1020,14 @@ is_unified_ideograph(Py_UCS4 code)
         (0x3400 <= code && code <= 0x4DBF)   || /* CJK Ideograph Extension A */
         (0x4E00 <= code && code <= 0x9FFF)   || /* CJK Ideograph */
         (0x20000 <= code && code <= 0x2A6DF) || /* CJK Ideograph Extension B */
-        (0x2A700 <= code && code <= 0x2B739) || /* CJK Ideograph Extension C */
+        (0x2A700 <= code && code <= 0x2B73F) || /* CJK Ideograph Extension C */
         (0x2B740 <= code && code <= 0x2B81D) || /* CJK Ideograph Extension D */
-        (0x2B820 <= code && code <= 0x2CEA1) || /* CJK Ideograph Extension E */
+        (0x2B820 <= code && code <= 0x2CEAD) || /* CJK Ideograph Extension E */
         (0x2CEB0 <= code && code <= 0x2EBE0) || /* CJK Ideograph Extension F */
         (0x2EBF0 <= code && code <= 0x2EE5D) || /* CJK Ideograph Extension I */
         (0x30000 <= code && code <= 0x3134A) || /* CJK Ideograph Extension G */
-        (0x31350 <= code && code <= 0x323AF);   /* CJK Ideograph Extension H */
+        (0x31350 <= code && code <= 0x323AF) || /* CJK Ideograph Extension H */
+        (0x323B0 <= code && code <= 0x33479);   /* CJK Ideograph Extension J */
 }
 
 /* macros used to determine if the given code point is in the PUA range that
-Original file line number
+Diff line change
@@ Expand Up / @@ -648,6 +648,12 @@ typing @@
       (Contributed by Nikita Sobolev in :gh:`137191`.)
+    unicodedata
+    -----------
+    * The Unicode database has been updated to Unicode 17.0.0.
     wave
     ----
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Update :mod:`unicodedata` database to Unicode 17.0.0.