Skip to content

Commit a89eb7d

Browse files
authored
1 parent 0c1de71 commit a89eb7d

File tree

538 files changed

+978355
-891773
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

538 files changed

+978355
-891773
lines changed

Package.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ var buildSettings: [CXXSetting] = [
2121
.define("U_COMMON_IMPLEMENTATION"),
2222
// Where data are stored
2323
.define("ICU_DATA_DIR", to: "\"/usr/share/icu/\""),
24-
.define("U_TIMEZONE_FILES_DIR", to: "\"/var/db/timezone/icutz\""),
25-
.define("USE_PACKAGE_DATA", to: "1")
24+
.define("USE_PACKAGE_DATA", to: "1"),
25+
.define("APPLE_ICU_CHANGES", to: "1")
2626
]
2727

2828
#if os(Windows)

README.md

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,13 @@ This version of the [ICU4C](https://icu.unicode.org/) project contains customize
77

88
## Versioning
99

10-
FoundationICU follows the same version number as the upstream ICU4C project that it contains. The oldest version that this package supports is `ICU 70.1`.
10+
See the following version matrix:
11+
12+
| `FoundationICU` version | `ICU` version |
13+
| --- | --- |
14+
| `0.0.2` and below | `70.1` |
15+
| `0.0.3` and above | `72.1` |
16+
1117

1218
## Adding FoundationICU as a Dependency
1319

@@ -16,7 +22,7 @@ FoundationICU follows the same version number as the upstream ICU4C project that
1622
To use the `FoundationICU` library in a SwiftPM project, add the following lines to the dependencies in your `Package.swift` file:
1723

1824
```swift
19-
.package(url: "https://github.com/apple/swift-foundation-icu", from: "70.1"),
25+
.package(url: "https://github.com/apple/swift-foundation-icu", from: "0.0.3"),
2026
```
2127

2228
Include `"FoundationICU"` as a dependency for your target:
@@ -41,4 +47,4 @@ extension UCalendarAttribute {
4147

4248
## Future Improvements
4349

44-
- **Data file handling**: currently, a pre-built data file is checked in as a binary file. In the future, we would like to check in the source files instead and build the data as a shared library to avoid the need to maintain and load a separate data file.
50+
- **Data file handling**: currently, the data file is embedded in the embedded in the binary itself as `[uint8_t]` (see `icu_packaged_data.h`). In the future, we would like to check in the source files instead and build the data as a shared library to avoid the need to maintain and load a separate data file.

icuSources/common/aaplbfct.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
/**
22
*******************************************************************************
33
* Copyright (C) 2007,2012 International Business Machines Corporation, Apple Inc.,*
4-
* and others. All Rights Reserved. *
4+
* and others. All Rights Reserved.
5+
* *
6+
* originally added per rdar://4448220 Add user dictionary support
57
*******************************************************************************
68
*/
79

@@ -21,6 +23,7 @@
2123
#include <unistd.h>
2224
#include <glob.h>
2325
#include <strings.h>
26+
#include <NSSystemDirectories.h>
2427
#include <sys/types.h>
2528
#include <sys/stat.h>
2629
#include <sys/mman.h>

icuSources/common/aaplbfct.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
/**
22
************************************************************************************
33
* Copyright (C) 2006-2007,2012 International Business Machines Corporation and others. *
4-
* All Rights Reserved. *
4+
* All Rights Reserved.
5+
*
6+
* originally added per rdar://4448220 Add user dictionary support *
57
************************************************************************************
68
*/
79

icuSources/common/appendable.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,23 +37,23 @@ Appendable::appendString(const UChar *s, int32_t length) {
3737
UChar c;
3838
while((c=*s++)!=0) {
3939
if(!appendCodeUnit(c)) {
40-
return FALSE;
40+
return false;
4141
}
4242
}
4343
} else if(length>0) {
4444
const UChar *limit=s+length;
4545
do {
4646
if(!appendCodeUnit(*s++)) {
47-
return FALSE;
47+
return false;
4848
}
4949
} while(s<limit);
5050
}
51-
return TRUE;
51+
return true;
5252
}
5353

5454
UBool
5555
Appendable::reserveAppendCapacity(int32_t /*appendCapacity*/) {
56-
return TRUE;
56+
return true;
5757
}
5858

5959
UChar *

icuSources/common/bmpset.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -309,9 +309,9 @@ BMPSet::contains(UChar32 c) const {
309309
// surrogate or supplementary code point
310310
return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]);
311311
} else {
312-
// Out-of-range code points get FALSE, consistent with long-standing
312+
// Out-of-range code points get false, consistent with long-standing
313313
// behavior of UnicodeSet::contains(c).
314-
return FALSE;
314+
return false;
315315
}
316316
}
317317

icuSources/common/brkeng.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ UnhandledEngine::findBreaks( UText *text,
7979
int32_t /* startPos */,
8080
int32_t endPos,
8181
UVector32 &/*foundBreaks*/,
82+
UBool /* isPhraseBreaking */,
8283
UErrorCode &status) const {
8384
if (U_FAILURE(status)) return 0;
8485
UChar32 c = utext_current32(text);
@@ -260,10 +261,10 @@ ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
260261
const UChar *extStart = u_memrchr(dictfname, 0x002e, dictnlength); // last dot
261262
if (extStart != NULL) {
262263
int32_t len = (int32_t)(extStart - dictfname);
263-
ext.appendInvariantChars(UnicodeString(FALSE, extStart + 1, dictnlength - len - 1), status);
264+
ext.appendInvariantChars(UnicodeString(false, extStart + 1, dictnlength - len - 1), status);
264265
dictnlength = len;
265266
}
266-
dictnbuf.appendInvariantChars(UnicodeString(FALSE, dictfname, dictnlength), status);
267+
dictnbuf.appendInvariantChars(UnicodeString(false, dictfname, dictnlength), status);
267268
ures_close(b);
268269

269270
UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext.data(), dictnbuf.data(), &status);

icuSources/common/brkeng.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ class LanguageBreakEngine : public UMemory {
7575
int32_t startPos,
7676
int32_t endPos,
7777
UVector32 &foundBreaks,
78+
UBool isPhraseBreaking,
7879
UErrorCode &status) const = 0;
7980

8081
};
@@ -194,6 +195,7 @@ class UnhandledEngine : public LanguageBreakEngine {
194195
int32_t startPos,
195196
int32_t endPos,
196197
UVector32 &foundBreaks,
198+
UBool isPhraseBreaking,
197199
UErrorCode &status) const override;
198200

199201
/**

icuSources/common/brkiter.cpp

Lines changed: 47 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "unicode/ures.h"
3131
#include "unicode/ustring.h"
3232
#include "unicode/filteredbrk.h"
33+
#include "bytesinkutil.h"
3334
#include "ucln_cmn.h"
3435
#include "cstring.h"
3536
#include "umutex.h"
@@ -115,7 +116,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
115116
}
116117

117118
// Create a RuleBasedBreakIterator
118-
result = new RuleBasedBreakIterator(file, status);
119+
result = new RuleBasedBreakIterator(file, uprv_strstr(type, "phrase") != NULL, status);
119120

120121
// If there is a result, set the valid locale and actual locale, and the kind
121122
if (U_SUCCESS(status) && result != NULL) {
@@ -200,7 +201,10 @@ BreakIterator::getAvailableLocales(int32_t& count)
200201
//-------------------------------------------
201202

202203
BreakIterator::BreakIterator()
204+
#if APPLE_ICU_CHANGES
205+
// rdar://36667210 Add ubrk_setLineWordOpts to programmatically set @lw options, add lw=keep-hangul support via keyword or function
203206
: fLineWordOpts(UBRK_LINEWORD_NORMAL)
207+
#endif // APPLE_ICU_CHANGES
204208
{
205209
*validLocale = *actualLocale = 0;
206210
}
@@ -279,7 +283,7 @@ ICUBreakIteratorService::~ICUBreakIteratorService() {}
279283
// defined in ucln_cmn.h
280284
U_NAMESPACE_END
281285

282-
static icu::UInitOnce gInitOnceBrkiter = U_INITONCE_INITIALIZER;
286+
static icu::UInitOnce gInitOnceBrkiter {};
283287
static icu::ICULocaleService* gService = NULL;
284288

285289

@@ -296,7 +300,7 @@ static UBool U_CALLCONV breakiterator_cleanup(void) {
296300
}
297301
gInitOnceBrkiter.reset();
298302
#endif
299-
return TRUE;
303+
return true;
300304
}
301305
U_CDECL_END
302306
U_NAMESPACE_BEGIN
@@ -347,7 +351,7 @@ BreakIterator::unregister(URegistryKey key, UErrorCode& status)
347351
}
348352
status = U_MEMORY_ALLOCATION_ERROR;
349353
}
350-
return FALSE;
354+
return false;
351355
}
352356

353357
// -------------------------------------
@@ -409,7 +413,6 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
409413
if (U_FAILURE(status)) {
410414
return NULL;
411415
}
412-
char lbType[kKeyValueLenMax];
413416

414417
BreakIterator *result = NULL;
415418
switch (kind) {
@@ -429,33 +432,57 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
429432
break;
430433
case UBRK_LINE:
431434
{
435+
char lb_lw[kKeyValueLenMax];
432436
UTRACE_ENTRY(UTRACE_UBRK_CREATE_LINE);
433-
uprv_strcpy(lbType, "line");
434-
char lbKeyValue[kKeyValueLenMax] = {0};
437+
uprv_strcpy(lb_lw, "line");
435438
UErrorCode kvStatus = U_ZERO_ERROR;
436-
int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kKeyValueLenMax, kvStatus);
437-
if (U_SUCCESS(kvStatus) && kLen > 0 && (uprv_strcmp(lbKeyValue,"strict")==0 || uprv_strcmp(lbKeyValue,"normal")==0 || uprv_strcmp(lbKeyValue,"loose")==0)) {
438-
uprv_strcat(lbType, "_");
439-
uprv_strcat(lbType, lbKeyValue);
439+
CharString value;
440+
CharStringByteSink valueSink(&value);
441+
loc.getKeywordValue("lb", valueSink, kvStatus);
442+
if (U_SUCCESS(kvStatus) && (value == "strict" || value == "normal" || value == "loose")) {
443+
uprv_strcat(lb_lw, "_");
444+
uprv_strcat(lb_lw, value.data());
440445
}
441-
result = BreakIterator::buildInstance(loc, lbType, status);
446+
#if APPLE_ICU_CHANGES
447+
// rdar://36667210 Add ubrk_setLineWordOpts to programmatically set @lw options, add lw=keep-hangul support via keyword or function
448+
value.clear();
449+
kvStatus = U_ZERO_ERROR;
450+
loc.getKeywordValue("lw", valueSink, kvStatus);
451+
// lw=phrase is only supported in Japanese.
452+
if (U_SUCCESS(kvStatus) && value == "phrase" && uprv_strcmp(loc.getLanguage(), "ja") == 0) {
453+
uprv_strcat(lb_lw, "_");
454+
uprv_strcat(lb_lw, value.data());
455+
}
456+
#else
457+
// lw=phrase is only supported in Japanese.
458+
if (uprv_strcmp(loc.getLanguage(), "ja") == 0) {
459+
value.clear();
460+
loc.getKeywordValue("lw", valueSink, kvStatus);
461+
if (U_SUCCESS(kvStatus) && value == "phrase") {
462+
uprv_strcat(lb_lw, "_");
463+
uprv_strcat(lb_lw, value.data());
464+
}
465+
}
466+
#endif // APPLE_ICU_CHANGES
467+
result = BreakIterator::buildInstance(loc, lb_lw, status);
468+
#if APPLE_ICU_CHANGES
469+
// rdar://36667210 Add ubrk_setLineWordOpts to programmatically set @lw options, add lw=keep-hangul support via keyword or function
442470
if (U_SUCCESS(status) && result != NULL) {
443-
char lwKeyValue[kKeyValueLenMax] = {0};
444-
UErrorCode kvStatus = U_ZERO_ERROR;
445-
int32_t kLen = loc.getKeywordValue("lw", lwKeyValue, kKeyValueLenMax, kvStatus);
446471
ULineWordOptions lineWordOpts = UBRK_LINEWORD_NORMAL;
447-
if (U_SUCCESS(kvStatus) && kLen > 0) {
448-
if (uprv_strcmp(lwKeyValue,"keepall")==0 || uprv_strcmp(lwKeyValue,"keep-all")==0) {
472+
if (U_SUCCESS(kvStatus)) {
473+
if (value == "keepall" || value == "keep-all") {
449474
lineWordOpts = UBRK_LINEWORD_KEEP_ALL;
450-
} else if (uprv_strcmp(lwKeyValue,"keep-hangul")==0) {
475+
} else if (value == "keep-hangul") {
451476
lineWordOpts = UBRK_LINEWORD_KEEP_HANGUL;
452477
}
453478
}
454479
result->setLineWordOpts(lineWordOpts);
455-
((RuleBasedBreakIterator *)result)->setCategoryOverrides(loc); // <rdar://problem/51193810>
480+
// rdar://51193810 for line break, remap locale delimiters that are QU to OP/CL as appropriate
481+
((RuleBasedBreakIterator *)result)->setCategoryOverrides(loc);
456482
}
483+
#endif // APPLE_ICU_CHANGES
457484

458-
UTRACE_DATA1(UTRACE_INFO, "lb=%s", lbKeyValue);
485+
UTRACE_DATA1(UTRACE_INFO, "lb_lw=%s", lb_lw);
459486
UTRACE_EXIT_STATUS(status);
460487
}
461488
break;

icuSources/common/bytesinkutil.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ U_NAMESPACE_BEGIN
2020
UBool
2121
ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Length,
2222
ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
23-
if (U_FAILURE(errorCode)) { return FALSE; }
23+
if (U_FAILURE(errorCode)) { return false; }
2424
char scratch[200];
2525
int32_t s8Length = 0;
2626
for (int32_t i = 0; i < s16Length;) {
@@ -44,25 +44,25 @@ ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Lengt
4444
}
4545
if (j > (INT32_MAX - s8Length)) {
4646
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
47-
return FALSE;
47+
return false;
4848
}
4949
sink.Append(buffer, j);
5050
s8Length += j;
5151
}
5252
if (edits != nullptr) {
5353
edits->addReplace(length, s8Length);
5454
}
55-
return TRUE;
55+
return true;
5656
}
5757

5858
UBool
5959
ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit,
6060
const char16_t *s16, int32_t s16Length,
6161
ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
62-
if (U_FAILURE(errorCode)) { return FALSE; }
62+
if (U_FAILURE(errorCode)) { return false; }
6363
if ((limit - s) > INT32_MAX) {
6464
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
65-
return FALSE;
65+
return false;
6666
}
6767
return appendChange((int32_t)(limit - s), s16, s16Length, sink, edits, errorCode);
6868
}
@@ -109,16 +109,16 @@ UBool
109109
ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
110110
ByteSink &sink, uint32_t options, Edits *edits,
111111
UErrorCode &errorCode) {
112-
if (U_FAILURE(errorCode)) { return FALSE; }
112+
if (U_FAILURE(errorCode)) { return false; }
113113
if ((limit - s) > INT32_MAX) {
114114
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
115-
return FALSE;
115+
return false;
116116
}
117117
int32_t length = (int32_t)(limit - s);
118118
if (length > 0) {
119119
appendNonEmptyUnchanged(s, length, sink, options, edits);
120120
}
121-
return TRUE;
121+
return true;
122122
}
123123

124124
CharStringByteSink::CharStringByteSink(CharString* dest) : dest_(*dest) {

0 commit comments

Comments
 (0)