Skip to content

Commit 3664eca

Browse files
committed
Generate Unicode normalization data for SwiftShims
1 parent 9b25a2d commit 3664eca

File tree

8 files changed

+2472
-297
lines changed

8 files changed

+2472
-297
lines changed

stdlib/public/SwiftShims/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ set(sources
1919
System.h
2020
Target.h
2121
ThreadLocalStorage.h
22+
UnicodeData.h
2223
UnicodeShims.h
2324
Visibility.h
2425
_SwiftConcurrency.h
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2021 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#ifndef SWIFT_STDLIB_SHIMS_UNICODEDATA_H
14+
#define SWIFT_STDLIB_SHIMS_UNICODEDATA_H
15+
16+
#include "SwiftStdint.h"
17+
#include "Visibility.h"
18+
19+
#ifdef __cplusplus
20+
extern "C" {
21+
#endif
22+
23+
SWIFT_RUNTIME_STDLIB_INTERNAL
24+
__swift_uint16_t _swift_stdlib_getNormData(__swift_uint32_t scalar);
25+
26+
SWIFT_RUNTIME_STDLIB_INTERNAL
27+
const __swift_uint8_t * const _swift_stdlib_nfd_decompositions;
28+
29+
SWIFT_RUNTIME_STDLIB_INTERNAL
30+
__swift_uint32_t _swift_stdlib_getDecompositionEntry(__swift_uint32_t scalar);
31+
32+
SWIFT_RUNTIME_STDLIB_INTERNAL
33+
__swift_uint32_t _swift_stdlib_getComposition(__swift_uint32_t x,
34+
__swift_uint32_t y);
35+
36+
SWIFT_RUNTIME_STDLIB_INTERNAL
37+
__swift_intptr_t _swift_stdlib_getMphIdx(__swift_uint32_t scalar,
38+
__swift_intptr_t levels,
39+
const __swift_uint64_t * const *keys,
40+
const __swift_uint16_t * const *ranks,
41+
const __swift_uint16_t * const sizes);
42+
43+
#ifdef __cplusplus
44+
} // extern "C"
45+
#endif
46+
47+
#endif // SWIFT_STDLIB_SHIMS_UNICODEDATA_H

stdlib/public/SwiftShims/UnicodeShims.h

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,6 @@ typedef enum __swift_stdlib_UNumericType {
432432

433433
typedef struct __swift_stdlib_UBreakIterator __swift_stdlib_UBreakIterator;
434434
typedef struct __swift_stdlib_UText __swift_stdlib_UText;
435-
typedef struct __swift_stdlib_UNormalizer2 __swift_stdlib_UNormalizer2;
436435
typedef __swift_int8_t __swift_stdlib_UBool;
437436
typedef __swift_int32_t __swift_stdlib_UChar32;
438437
#if defined(__APPLE__)
@@ -489,27 +488,6 @@ SWIFT_RUNTIME_STDLIB_API
489488
__swift_int32_t __swift_stdlib_ubrk_following(__swift_stdlib_UBreakIterator *bi,
490489
__swift_int32_t offset);
491490

492-
SWIFT_RUNTIME_STDLIB_API
493-
__swift_stdlib_UBool
494-
__swift_stdlib_unorm2_hasBoundaryBefore(const __swift_stdlib_UNormalizer2 *,
495-
__swift_stdlib_UChar32);
496-
497-
SWIFT_RUNTIME_STDLIB_API
498-
const __swift_stdlib_UNormalizer2 *
499-
__swift_stdlib_unorm2_getNFCInstance(__swift_stdlib_UErrorCode *);
500-
501-
SWIFT_RUNTIME_STDLIB_API
502-
__swift_int32_t
503-
__swift_stdlib_unorm2_normalize(const __swift_stdlib_UNormalizer2 *,
504-
const __swift_stdlib_UChar *, __swift_int32_t,
505-
__swift_stdlib_UChar *, __swift_int32_t,
506-
__swift_stdlib_UErrorCode *);
507-
508-
SWIFT_RUNTIME_STDLIB_API
509-
__swift_int32_t __swift_stdlib_unorm2_spanQuickCheckYes(
510-
const __swift_stdlib_UNormalizer2 *, const __swift_stdlib_UChar *,
511-
__swift_int32_t, __swift_stdlib_UErrorCode *);
512-
513491
SWIFT_RUNTIME_STDLIB_API
514492
__swift_stdlib_UBool
515493
__swift_stdlib_u_hasBinaryProperty(__swift_stdlib_UChar32,

stdlib/public/SwiftShims/module.modulemap

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ module SwiftShims {
1717
header "SwiftStdint.h"
1818
header "System.h"
1919
header "ThreadLocalStorage.h"
20+
header "UnicodeData.h"
2021
header "UnicodeShims.h"
2122
header "Visibility.h"
2223
export *

stdlib/public/stubs/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ set(swift_stubs_sources
77
Stubs.cpp
88
ThreadLocalStorage.cpp
99
MathStubs.cpp
10+
UnicodeData.cpp
11+
UnicodeNormalization.cpp
1012
)
1113
set(swift_stubs_objc_sources
1214
Availability.mm
@@ -18,7 +20,7 @@ set(swift_stubs_objc_sources
1820
set(swift_stubs_gyb_sources
1921
SwiftNativeNSXXXBase.mm.gyb)
2022
set(swift_stubs_unicode_normalization_sources
21-
UnicodeNormalization.cpp)
23+
UnicodeShims.cpp)
2224
set(LLVM_OPTIONAL_SOURCES
2325
${swift_stubs_objc_sources}
2426
${swift_stubs_unicode_normalization_sources}

stdlib/public/stubs/UnicodeData.cpp

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2021 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "../SwiftShims/UnicodeData.h"
14+
15+
static inline __swift_uint32_t scramble(__swift_uint32_t scalar) {
16+
scalar *= 0xCC9E2D51;
17+
scalar = (scalar << 15) | (scalar >> 17);
18+
scalar *= 0x1B873593;
19+
return scalar;
20+
}
21+
22+
// This is a reimplementation of MurMur3 hash with a modulo at the end.
23+
static __swift_uint32_t hash(__swift_uint32_t scalar, __swift_uint32_t level,
24+
__swift_uint32_t seed) {
25+
__swift_uint32_t hash = seed;
26+
27+
hash ^= scramble(scalar);
28+
hash = (hash << 13) | (hash >> 19);
29+
hash = hash * 5 + 0xE6546B64;
30+
31+
hash ^= scramble(level);
32+
hash = (hash << 13) | (hash >> 19);
33+
hash = hash * 5 + 0xE6546B64;
34+
35+
hash ^= 8;
36+
hash ^= hash >> 16;
37+
hash *= 0x85EBCA6B;
38+
hash ^= hash >> 13;
39+
hash *= 0xC2B2AE35;
40+
hash ^= hash >> 16;
41+
42+
return hash % level;
43+
}
44+
45+
SWIFT_RUNTIME_STDLIB_INTERNAL
46+
__swift_intptr_t _swift_stdlib_getMphIdx(__swift_uint32_t scalar,
47+
__swift_intptr_t levels,
48+
const __swift_uint64_t * const *keys,
49+
const __swift_uint16_t * const *ranks,
50+
const __swift_uint16_t * const sizes) {
51+
__swift_intptr_t resultIdx = 0;
52+
53+
for (int i = 0; i != levels; i += 1) {
54+
auto bitArray = keys[i];
55+
56+
auto idx = (__swift_uint64_t) hash(scalar, sizes[i], i);
57+
58+
auto word = bitArray[idx / 64];
59+
auto mask = (__swift_uint64_t) 1 << (idx % 64);
60+
61+
if (word & mask) {
62+
auto rank = ranks[i][idx / 512];
63+
64+
for (int j = (idx / 64) & ~7; j != idx / 64; j += 1) {
65+
rank += __builtin_popcountll(bitArray[j]);
66+
}
67+
68+
auto finalWord = bitArray[idx / 64];
69+
70+
if (idx % 64 > 0) {
71+
rank += __builtin_popcountll(finalWord << (64 - (idx % 64)));
72+
}
73+
74+
resultIdx = rank;
75+
break;
76+
}
77+
}
78+
79+
return resultIdx;
80+
}

0 commit comments

Comments
 (0)