Skip to content

Commit aa82a40

Browse files
authored
check max size of utf8proc_decompose_char buffer (#291)
* check max size of utf8proc_decompose_char buffer * cmake rule for maxdecomposition test * Update maxdecomposition.c
1 parent a720bbf commit aa82a40

File tree

5 files changed

+38
-3
lines changed

5 files changed

+38
-3
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
/test/case
2929
/test/iscase
3030
/test/custom
31+
/test/maxdecomposition
3132
/tmp/
3233
/mingw_static/
3334
/mingw_shared/

CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,12 +100,15 @@ if(UTF8PROC_ENABLE_TESTING)
100100
target_link_libraries(printproperty utf8proc)
101101
add_executable(valid test/tests.h test/tests.c utf8proc.h test/valid.c)
102102
target_link_libraries(valid utf8proc)
103+
add_executable(maxdecomposition test/tests.h test/tests.c utf8proc.h test/maxdecomposition.c)
104+
target_link_libraries(maxdecomposition utf8proc)
103105
add_test(utf8proc.testcase case)
104106
add_test(utf8proc.testcustom custom)
105107
add_test(utf8proc.testiterate iterate)
106108
add_test(utf8proc.testmisc misc)
107109
add_test(utf8proc.testprintproperty printproperty)
108110
add_test(utf8proc.testvalid valid)
111+
add_test(utf8proc.testmaxdecomposition maxdecomposition)
109112

110113
if (NOT WIN32)
111114
# no wcwidth function on Windows

Makefile

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ clean:
5959
ifneq ($(OS),Darwin)
6060
rm -f libutf8proc.so.$(MAJOR)
6161
endif
62-
rm -f test/tests.o test/normtest test/graphemetest test/printproperty test/charwidth test/valid test/iterate test/case test/custom test/misc test/iscase
62+
rm -f test/tests.o test/normtest test/graphemetest test/printproperty test/charwidth test/valid test/iterate test/case test/custom test/misc test/iscase test/maxdecomposition
6363
rm -rf MANIFEST.new tmp
6464
$(MAKE) -C bench clean
6565
$(MAKE) -C data clean
@@ -171,6 +171,9 @@ test/custom: test/custom.c test/tests.o utf8proc.o utf8proc.h test/tests.h
171171
test/misc: test/misc.c test/tests.o utf8proc.o utf8proc.h test/tests.h
172172
$(CC) $(UCFLAGS) $(LDFLAGS) -DUNICODE_VERSION='"'`$(PERL) -ne "/^UNICODE_VERSION=/ and print $$';" data/Makefile`'"' test/misc.c test/tests.o utf8proc.o -o $@
173173

174+
test/maxdecomposition: test/maxdecomposition.c test/tests.o utf8proc.o utf8proc.h test/tests.h
175+
$(CC) $(UCFLAGS) $(LDFLAGS) -DUNICODE_VERSION='"'`$(PERL) -ne "/^UNICODE_VERSION=/ and print $$';" data/Makefile`'"' test/maxdecomposition.c test/tests.o utf8proc.o -o $@
176+
174177
# make release tarball from master branch
175178
dist:
176179
git archive master --prefix=utf8proc-$(VERSION)/ -o utf8proc-$(VERSION).tar.gz
@@ -186,7 +189,7 @@ distcheck: dist
186189
make -C utf8proc-$(VERSION) check
187190
rm -rf utf8proc-$(VERSION)
188191

189-
check: test/normtest data/NormalizationTest.txt data/Lowercase.txt data/Uppercase.txt test/graphemetest data/GraphemeBreakTest.txt test/printproperty test/case test/iscase test/custom test/charwidth test/misc test/valid test/iterate bench/bench.c bench/util.c bench/util.h utf8proc.o
192+
check: test/normtest data/NormalizationTest.txt data/Lowercase.txt data/Uppercase.txt test/graphemetest data/GraphemeBreakTest.txt test/printproperty test/case test/iscase test/custom test/charwidth test/misc test/maxdecomposition test/valid test/iterate bench/bench.c bench/util.c bench/util.h utf8proc.o
190193
$(MAKE) -C bench
191194
test/normtest data/NormalizationTest.txt
192195
test/graphemetest data/GraphemeBreakTest.txt
@@ -197,3 +200,4 @@ check: test/normtest data/NormalizationTest.txt data/Lowercase.txt data/Uppercas
197200
test/case
198201
test/iscase data/Lowercase.txt data/Uppercase.txt
199202
test/custom
203+
test/maxdecomposition

test/maxdecomposition.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#include "tests.h"
2+
3+
/* Check the maximum decomposed size returned by utf8proc_decompose_char with UTF8PROC_DECOMPOSE,
4+
in order to give a hint in the documentation. The hint will need to be updated if this changes. */
5+
6+
int main(void)
7+
{
8+
utf8proc_int32_t dst[128];
9+
utf8proc_ssize_t maxsize = 0, expected_maxsize = 4;
10+
int success;
11+
12+
for (utf8proc_int32_t c = 0; c <= 0x110000; ++c) {
13+
utf8proc_ssize_t sz = utf8proc_decompose_char(c, dst, 128, UTF8PROC_DECOMPOSE, NULL);
14+
maxsize = sz > maxsize ? sz : maxsize;
15+
}
16+
17+
success = expected_maxsize == maxsize;
18+
fprintf(success ? stdout : stderr,
19+
"%s: maximum decomposed size = %d chars\n",
20+
success ? "SUCCEEDED" : "FAILED", (int) maxsize);
21+
return !success;
22+
}

utf8proc.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -517,7 +517,7 @@ UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int
517517
* @param dst the destination buffer.
518518
* @param bufsize the size of the destination buffer.
519519
* @param options one or more of the following flags:
520-
* - @ref UTF8PROC_REJECTNA - return an error `codepoint` is unassigned
520+
* - @ref UTF8PROC_REJECTNA - return an error if `codepoint` is unassigned
521521
* - @ref UTF8PROC_IGNORE - strip "default ignorable" codepoints
522522
* - @ref UTF8PROC_CASEFOLD - apply Unicode casefolding
523523
* - @ref UTF8PROC_COMPAT - replace certain codepoints with their
@@ -532,6 +532,11 @@ UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int
532532
* option is used. If the string is being processed in order, this can be initialized to 0 for
533533
* the beginning of the string, and is thereafter updated automatically. Otherwise, this parameter is ignored.
534534
*
535+
* In the current version of utf8proc, the maximum destination buffer with the @ref UTF8PROC_DECOMPOSE
536+
* option is 4 elements (or double that with @ref UTF8PROC_CHARBOUND), so this is a good default size.
537+
* However, this may increase in future Unicode versions, so you should always check the return value
538+
* as described below.
539+
*
535540
* @return
536541
* In case of success, the number of codepoints written is returned; in case
537542
* of an error, a negative error code is returned (utf8proc_errmsg()).

0 commit comments

Comments
 (0)