Skip to content

Commit dede492

Browse files
committed
Merge remote-tracking branch 'upstream/master' into add-cmake-config-file-package
2 parents 8ed21d9 + a1b99da commit dede492

File tree

19 files changed

+15125
-14932
lines changed

19 files changed

+15125
-14932
lines changed

.github/dependabot.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
version: 2
2+
updates:
3+
- package-ecosystem: "github-actions"
4+
directory: "/"
5+
schedule:
6+
interval: "daily"
7+
commit-message:
8+
prefix: "ci"

.github/workflows/ci-fuzz.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ jobs:
1616
fuzz-seconds: 600
1717
dry-run: false
1818
- name: Upload Crash
19-
uses: actions/upload-artifact@v1
19+
uses: actions/upload-artifact@v4
2020
if: failure()
2121
with:
2222
name: artifacts
23-
path: ./out/artifacts
23+
path: ./out/artifacts

.github/workflows/cmake.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
runs-on: ${{ matrix.os }}
1818
name: ${{ matrix.os }} - shared=${{ matrix.shared }}
1919
steps:
20-
- uses: actions/checkout@v2
20+
- uses: actions/checkout@v4
2121
- name: Build
2222
run: |
2323
mkdir build
@@ -27,7 +27,7 @@ jobs:
2727
run: ctest --test-dir build -V
2828
- name: Upload shared lib
2929
if: matrix.shared == 'ON'
30-
uses: actions/upload-artifact@v2
30+
uses: actions/upload-artifact@v4
3131
with:
3232
name: ${{ matrix.os }}
3333
path: |
@@ -60,7 +60,7 @@ jobs:
6060
run:
6161
shell: msys2 {0}
6262
steps:
63-
- uses: actions/checkout@v2
63+
- uses: actions/checkout@v4
6464
- uses: msys2/setup-msys2@v2
6565
with:
6666
install: gcc make mingw-w64-x86_64-cmake
@@ -73,7 +73,7 @@ jobs:
7373
run: ctest --test-dir build -V
7474
- name: Upload shared lib
7575
if: matrix.shared == 'ON'
76-
uses: actions/upload-artifact@v2
76+
uses: actions/upload-artifact@v4
7777
with:
7878
name: windows-mingw64
7979
path: build/libutf8proc.*

.github/workflows/make.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@ jobs:
1616
runs-on: ${{ matrix.os }}
1717
name: ${{ matrix.os }}
1818
steps:
19-
- uses: actions/checkout@v2
19+
- uses: actions/checkout@v4
2020
# TODO: update makefile to check MANIFEST
21-
# - name: Install dependencies (MacOS)
22-
# if: matrix.config.os == 'macos-latest'
23-
# run: brew install ruby findutils
21+
- name: Install dependencies (MacOS)
22+
if: runner.os == 'macOS'
23+
run: brew install julia
2424

2525
- name: Check MANIFEST
2626
if: matrix.config.os == 'ubuntu-latest'
@@ -35,7 +35,7 @@ jobs:
3535
- name: Make lib
3636
run: make
3737
- name: Upload shared lib
38-
uses: actions/upload-artifact@v2
38+
uses: actions/upload-artifact@v4
3939
with:
4040
name: make-${{ matrix.os }}
4141
path: libutf8proc.*

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,7 @@
3737
/build/
3838
NEWS-update.jl
3939
libutf8proc.pc
40+
41+
# clangd
42+
/.cache/
43+
/compile_commands.json

CMakeLists.txt

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,17 @@
1-
cmake_minimum_required (VERSION 3.5)
1+
cmake_minimum_required (VERSION 3.10)
22

33
include (utils.cmake)
44

55
disallow_intree_builds()
66

7-
if (POLICY CMP0048)
8-
cmake_policy (SET CMP0048 NEW)
9-
endif ()
10-
project (utf8proc VERSION 2.9.0 LANGUAGES C)
7+
# API version - be sure to update utf8proc.h and Makefile, too!
8+
project (utf8proc VERSION 2.10.0 LANGUAGES C)
119

1210
# This is the ABI version number, which may differ from the
1311
# API version number (defined in utf8proc.h and above).
1412
# Be sure to also update these in Makefile and MANIFEST!
1513
set(SO_MAJOR 3)
16-
set(SO_MINOR 0)
14+
set(SO_MINOR 1)
1715
set(SO_PATCH 0)
1816

1917
option(UTF8PROC_INSTALL "Enable installation of utf8proc" On)
@@ -87,7 +85,7 @@ endif()
8785
if(UTF8PROC_ENABLE_TESTING)
8886
enable_testing()
8987
file(MAKE_DIRECTORY data)
90-
set(UNICODE_VERSION 15.1.0)
88+
set(UNICODE_VERSION 16.0.0)
9189
file(DOWNLOAD https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/NormalizationTest.txt ${CMAKE_BINARY_DIR}/data/NormalizationTest.txt SHOW_PROGRESS)
9290
file(DOWNLOAD https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/auxiliary/GraphemeBreakTest.txt ${CMAKE_BINARY_DIR}/data/GraphemeBreakTest.txt SHOW_PROGRESS)
9391
add_executable(case test/tests.h test/tests.c utf8proc.h test/case.c)

MANIFEST

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ include/
22
include/utf8proc.h
33
lib/
44
lib/libutf8proc.a
5-
lib/libutf8proc.so -> libutf8proc.so.3.0.0
6-
lib/libutf8proc.so.2 -> libutf8proc.so.3.0.0
7-
lib/libutf8proc.so.3.0.0
5+
lib/libutf8proc.so -> libutf8proc.so.3.1.0
6+
lib/libutf8proc.so.2 -> libutf8proc.so.3.1.0
7+
lib/libutf8proc.so.3.1.0
88
lib/pkgconfig/
99
lib/pkgconfig/libutf8proc.pc

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,11 @@ SOFLAG = -Wl,-soname
2323
# The API version number is defined in utf8proc.h.
2424
# Be sure to also update these ABI versions in MANIFEST and CMakeLists.txt!
2525
MAJOR=3
26-
MINOR=0
26+
MINOR=1
2727
PATCH=0
2828

2929
# api version (also in utf8proc.h and CMakeLists.txt)
30-
VERSION=2.9.0
30+
VERSION=2.10.0
3131

3232
OS := $(shell uname)
3333
ifeq ($(OS),Darwin) # MacOS X

NEWS.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
# utf8proc release history #
22

3+
## Version 2.10.0 ##
4+
5+
2024-12-31
6+
7+
- Unicode 16 support ([#277]).
8+
- New `utf8proc_charwidth_ambiguous` function to return whether a character has
9+
East Asian width class A (Ambiguous) ([#270]).
10+
311
## Version 2.9.0 ##
412

513
2023-10-20
@@ -443,3 +451,5 @@ Release of version 1.0.1
443451
[#233]: https://github.com/JuliaStrings/utf8proc/issues/233
444452
[#247]: https://github.com/JuliaStrings/utf8proc/issues/247
445453
[#253]: https://github.com/JuliaStrings/utf8proc/issues/253
454+
[#270]: https://github.com/JuliaStrings/utf8proc/issues/270
455+
[#277]: https://github.com/JuliaStrings/utf8proc/issues/277

README.md

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ developers became involved because they wanted to add Unicode 7 support and othe
2020

2121
(The original utf8proc package also includes Ruby and PostgreSQL plug-ins.
2222
We removed those from utf8proc in order to focus exclusively on the C
23-
library for the time being, but plan to add them back in or release them as separate packages.)
23+
library.)
2424

2525
The utf8proc package is licensed under the
2626
free/open-source [MIT "expat"
@@ -69,7 +69,7 @@ The C library is found in this directory after successful compilation
6969
and is named `libutf8proc.a` (for the static library) and
7070
`libutf8proc.so` (for the dynamic library).
7171

72-
The Unicode version supported is 15.1.0.
72+
The Unicode version supported is 16.0.0.
7373

7474
For Unicode normalizations, the following options are used:
7575

@@ -96,3 +96,50 @@ the [utf8proc issues page on Github](https://github.com/JuliaLang/utf8proc/issue
9696
## See also
9797

9898
An independent Lua translation of this library, [lua-mojibake](https://github.com/differentprogramming/lua-mojibake), is also available.
99+
100+
## Examples
101+
102+
### Convert codepoint to string
103+
```c
104+
// Convert codepoint `a` to utf8 string `str`
105+
utf8proc_int32_t a = 223;
106+
utf8proc_uint8_t str[16] = { 0 };
107+
utf8proc_encode_char(a, str);
108+
printf("%s\n", str);
109+
// ß
110+
```
111+
112+
### Convert string to codepoint
113+
```c
114+
// Convert string `str` to pointer to codepoint `a`
115+
utf8proc_uint8_t str[] = "ß";
116+
utf8proc_int32_t a;
117+
utf8proc_iterate(str, -1, &a);
118+
printf("%d\n", a);
119+
// 223
120+
```
121+
122+
### Casefold
123+
124+
```c
125+
// Convert "ß" (U+00DF) to its casefold variant "ss"
126+
utf8proc_uint8_t str[] = "ß";
127+
utf8proc_uint8_t *fold_str;
128+
utf8proc_map(str, 0, &fold_str, UTF8PROC_NULLTERM | UTF8PROC_CASEFOLD);
129+
printf("%s\n", fold_str);
130+
// ss
131+
free(fold_str);
132+
```
133+
134+
### Normalization Form C/D (NFC/NFD)
135+
```c
136+
// Decompose "\u00e4\u00f6\u00fc" = "äöü" into "a\u0308o\u0308u\u0308" (= "äöü" via combining char U+0308)
137+
utf8proc_uint8_t input[] = {0xc3, 0xa4, 0xc3, 0xb6, 0xc3, 0xbc}; // "\u00e4\u00f6\u00fc" = "äöü" in UTF-8
138+
utf8proc_uint8_t *nfd= utf8proc_NFD(input); // = {0x61, 0xcc, 0x88, 0x6f, 0xcc, 0x88, 0x75, 0xcc, 0x88}
139+
140+
// Compose "a\u0308o\u0308u\u0308" into "\u00e4\u00f6\u00fc" (= "äöü" via precomposed characters)
141+
utf8proc_uint8_t *nfc= utf8proc_NFC(nfd);
142+
143+
free(nfd);
144+
free(nfc);
145+
```

0 commit comments

Comments
 (0)