Skip to content

Commit 5a0bbb9

Browse files
authored
[stdlib] Implement native grapheme breaking for String (swiftlang#37864)
* Implement GraphemeWalker that does native grapheme breaking * Bridged strings use native grapheme breaking for forward strides * Implement bidirectional native grapheme breaking for native and foreign strings * Remove ICU's grapheme breaking support * Use UnicodeScalarView to implement GraphemeWalker use an Iterator approach remove Iterator conformance * Incorporate Michael's feedback more comments addressed fix crlf bug * Try bringing back some old fast paths * Parameterize nextBoundary and previousBoundary Parameterize nextBoundary and previousBoundary * Implement Michael's suggestions
1 parent d682049 commit 5a0bbb9

File tree

16 files changed

+3639
-419
lines changed

16 files changed

+3639
-419
lines changed

stdlib/public/SwiftShims/UnicodeData.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ __swift_intptr_t _swift_stdlib_getMphIdx(__swift_uint32_t scalar,
4040
const __swift_uint16_t * const *ranks,
4141
const __swift_uint16_t * const sizes);
4242

43+
SWIFT_RUNTIME_STDLIB_INTERNAL
44+
__swift_uint8_t _swift_stdlib_getGraphemeBreakProperty(__swift_uint32_t scalar);
45+
4346
#ifdef __cplusplus
4447
} // extern "C"
4548
#endif

stdlib/public/SwiftShims/UnicodeShims.h

Lines changed: 0 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -29,39 +29,6 @@
2929
extern "C" {
3030
#endif
3131

32-
SWIFT_RUNTIME_STDLIB_API
33-
const __swift_uint8_t *_swift_stdlib_GraphemeClusterBreakPropertyTrie;
34-
35-
struct _swift_stdlib_GraphemeClusterBreakPropertyTrieMetadataTy {
36-
unsigned BMPFirstLevelIndexBits;
37-
unsigned BMPDataOffsetBits;
38-
unsigned SuppFirstLevelIndexBits;
39-
unsigned SuppSecondLevelIndexBits;
40-
unsigned SuppDataOffsetBits;
41-
42-
unsigned BMPLookupBytesPerEntry;
43-
unsigned BMPDataBytesPerEntry;
44-
unsigned SuppLookup1BytesPerEntry;
45-
unsigned SuppLookup2BytesPerEntry;
46-
unsigned SuppDataBytesPerEntry;
47-
48-
unsigned TrieSize;
49-
50-
unsigned BMPLookupBytesOffset;
51-
unsigned BMPDataBytesOffset;
52-
unsigned SuppLookup1BytesOffset;
53-
unsigned SuppLookup2BytesOffset;
54-
unsigned SuppDataBytesOffset;
55-
};
56-
57-
SWIFT_RUNTIME_STDLIB_API
58-
const struct _swift_stdlib_GraphemeClusterBreakPropertyTrieMetadataTy
59-
_swift_stdlib_GraphemeClusterBreakPropertyTrieMetadata;
60-
61-
SWIFT_RUNTIME_STDLIB_API
62-
const __swift_uint16_t *
63-
_swift_stdlib_ExtendedGraphemeClusterNoBoundaryRulesMatrix;
64-
6532
SWIFT_RUNTIME_STDLIB_API
6633
__swift_int32_t _swift_stdlib_unicode_strToUpper(
6734
__swift_uint16_t *Destination, __swift_int32_t DestinationCapacity,
@@ -362,17 +329,6 @@ typedef enum __swift_stdlib_UErrorCode {
362329
__swift_stdlib_U_ERROR_LIMIT = __swift_stdlib_U_PLUGIN_ERROR_LIMIT
363330
} __swift_stdlib_UErrorCode;
364331

365-
typedef enum __swift_stdlib_UBreakIteratorType {
366-
__swift_stdlib_UBRK_CHARACTER = 0,
367-
__swift_stdlib_UBRK_WORD = 1,
368-
__swift_stdlib_UBRK_LINE = 2,
369-
__swift_stdlib_UBRK_SENTENCE = 3,
370-
#ifndef U_HIDE_DEPRECATED_API
371-
__swift_stdlib_UBRK_TITLE = 4,
372-
#endif
373-
__swift_stdlib_UBRK_COUNT = 5
374-
} __swift_stdlib_UBreakIteratorType;
375-
376332
typedef enum __swift_stdlib_UCharCategory {
377333
__swift_stdlib_U_UNASSIGNED = 0,
378334
__swift_stdlib_U_GENERAL_OTHER_TYPES = 0,
@@ -447,47 +403,6 @@ typedef __swift_uint16_t __swift_stdlib_UChar;
447403
typedef __swift_uint8_t
448404
__swift_stdlib_UVersionInfo[__SWIFT_STDLIB_U_MAX_VERSION_LENGTH];
449405

450-
SWIFT_RUNTIME_STDLIB_API
451-
void __swift_stdlib_ubrk_close(__swift_stdlib_UBreakIterator *bi);
452-
453-
SWIFT_RUNTIME_STDLIB_API
454-
__swift_stdlib_UBreakIterator *
455-
__swift_stdlib_ubrk_open(__swift_stdlib_UBreakIteratorType type,
456-
const char *_Nullable locale,
457-
const __swift_stdlib_UChar *_Null_unspecified text,
458-
__swift_int32_t textLength,
459-
__swift_stdlib_UErrorCode *status);
460-
461-
SWIFT_RUNTIME_STDLIB_API
462-
void __swift_stdlib_ubrk_setUText(__swift_stdlib_UBreakIterator *bi,
463-
__swift_stdlib_UText *text,
464-
__swift_stdlib_UErrorCode *status);
465-
466-
SWIFT_RUNTIME_STDLIB_API
467-
void __swift_stdlib_ubrk_setText(__swift_stdlib_UBreakIterator *bi,
468-
const __swift_stdlib_UChar *text,
469-
__swift_int32_t textLength,
470-
__swift_stdlib_UErrorCode *status);
471-
472-
SWIFT_RUNTIME_STDLIB_API
473-
__swift_stdlib_UText *
474-
__swift_stdlib_utext_openUTF8(__swift_stdlib_UText *_Nullable, const char *,
475-
__swift_int64_t, __swift_stdlib_UErrorCode *);
476-
477-
SWIFT_RUNTIME_STDLIB_API
478-
__swift_stdlib_UText *
479-
__swift_stdlib_utext_openUChars(__swift_stdlib_UText *_Nullable,
480-
const __swift_stdlib_UChar *, __swift_int64_t,
481-
__swift_stdlib_UErrorCode *);
482-
483-
SWIFT_RUNTIME_STDLIB_API
484-
__swift_int32_t __swift_stdlib_ubrk_preceding(__swift_stdlib_UBreakIterator *bi,
485-
__swift_int32_t offset);
486-
487-
SWIFT_RUNTIME_STDLIB_API
488-
__swift_int32_t __swift_stdlib_ubrk_following(__swift_stdlib_UBreakIterator *bi,
489-
__swift_int32_t offset);
490-
491406
SWIFT_RUNTIME_STDLIB_API
492407
__swift_stdlib_UBool
493408
__swift_stdlib_u_hasBinaryProperty(__swift_stdlib_UChar32,

stdlib/public/core/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ set(SWIFTLIB_ESSENTIAL
177177
UnavailableStringAPIs.swift
178178
UnicodeData.swift
179179
UnicodeEncoding.swift
180+
UnicodeGraphemeBreakProperty.swift
180181
UnicodeHelpers.swift
181182
UnicodeParser.swift
182183
UnicodeScalarProperties.swift

stdlib/public/core/GroupInfo.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
"Unicode.swift",
4646
"UnicodeData.swift",
4747
"UnicodeEncoding.swift",
48+
"UnicodeGraphemeBreakProperty.swift",
4849
"UnicodeHelpers.swift",
4950
"UnicodeParser.swift",
5051
"UnicodeScalar.swift",

0 commit comments

Comments
 (0)