Skip to content

Commit abe589e

Browse files
committed
feat: add support for utf8
1 parent 04a412d commit abe589e

File tree

1 file changed

+69
-1
lines changed

1 file changed

+69
-1
lines changed

src/langlib/string/kllib_string.c

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,17 @@ static KlException kllib_string_sub(KlState* state);
1313
static KlException kllib_string_find(KlState* state);
1414
static KlException kllib_string_join(KlState* state);
1515

16+
static KlException kllib_string_utf8idx(KlState* state);
17+
static KlException kllib_string_utf8len(KlState* state);
18+
19+
static inline size_t kllib_string_utf8charlen(const unsigned char* str) {
20+
unsigned ch = *str;
21+
if (ch < 0xC0) return 1;
22+
if (ch < 0xE0) return 2;
23+
if (ch < 0xF0) return 3;
24+
return 4;
25+
}
26+
1627
static inline KlInt kllib_normalise_stringidx(KlInt idx, size_t strlength) {
1728
while (idx < 0)
1829
idx = strlength + idx;
@@ -38,6 +49,14 @@ KlException KLCONFIG_LIBRARY_STRING_ENTRYFUNCNAME(KlState* state) {
3849
klapi_setcfunc(state, -1, kllib_string_sub);
3950
KLAPI_PROTECT(klapi_class_newshared_method(state, strclass, klapi_getstring(state, -2)));
4051

52+
KLAPI_PROTECT(klapi_setstring(state, -2, "utf8len"));
53+
klapi_setcfunc(state, -1, kllib_string_utf8len);
54+
KLAPI_PROTECT(klapi_class_newshared_method(state, strclass, klapi_getstring(state, -2)));
55+
56+
KLAPI_PROTECT(klapi_setstring(state, -2, "utf8idx"));
57+
klapi_setcfunc(state, -1, kllib_string_utf8idx);
58+
KLAPI_PROTECT(klapi_class_newshared_method(state, strclass, klapi_getstring(state, -2)));
59+
4160
return klapi_return(state, 0);
4261
}
4362

@@ -58,7 +77,7 @@ static KlException kllib_string_sub(KlState* state) {
5877
end = kllib_normalise_stringidx(klapi_getint(state, -1), strlength);
5978
}
6079
if (kl_unlikely(end < begin))
61-
return klapi_throw_internal(state, KL_E_INVLD, "invalid range: (%zd, %zd) for string: %s", begin, end, klstring_content(str));
80+
return klapi_throw_internal(state, KL_E_RANGE, "invalid range: (%zd, %zd) for string: %s", begin, end, klstring_content(str));
6281
KlString* res = klstrpool_new_string_buf(klstate_strpool(state), klstring_content(str) + begin, end - begin);
6382
if (kl_unlikely(!res))
6483
return klapi_throw_internal(state, KL_E_OOM, "out of memory while creating string");
@@ -152,3 +171,52 @@ static KlException kllib_string_join_raw(KlState* state, size_t nval, KlValue* v
152171
}
153172
}
154173
}
174+
175+
static KlException kllib_string_utf8idx(KlState* state) {
176+
if (kl_unlikely(klapi_narg(state) != 2 && klapi_narg(state) != 3))
177+
return klapi_throw_internal(state, KL_E_ARGNO, "expected two or three arguments");
178+
if (kl_unlikely(!klapi_checktypeb(state, 0, KL_STRING)))
179+
return klapi_throw_internal(state, KL_E_TYPE, "expected string, got %s", klstring_content(klapi_typename(state, klapi_accessb(state, 0))));
180+
if (kl_unlikely(!klapi_checktypeb(state, 1, KL_INT)))
181+
return klapi_throw_internal(state, KL_E_TYPE, "expected integer, got %s", klstring_content(klapi_typename(state, klapi_accessb(state, 1))));
182+
KlString* str = klapi_getstringb(state, 0);
183+
KlInt count = klapi_getintb(state, 1);
184+
KlInt begin = 0;
185+
if (klapi_narg(state) == 3) {
186+
if (kl_unlikely(!klapi_checktype(state, -1, KL_INT)))
187+
return klapi_throw_internal(state, KL_E_TYPE, "expected integer, got %s", klstring_content(klapi_typename(state, klapi_access(state, -1))));
188+
begin = kllib_normalise_stringidx(klapi_getint(state, -1), klstring_length(str));
189+
}
190+
if (kl_unlikely(count < 0))
191+
return klapi_throw_internal(state, KL_E_TYPE, "expected positive index");
192+
193+
const unsigned char* text = (unsigned char*)klstring_content(str);
194+
size_t strlength = klstring_length(str);
195+
size_t idx = begin;
196+
while (count-- > 0) {
197+
idx += kllib_string_utf8charlen(text + idx);
198+
if (kl_unlikely(idx >= strlength))
199+
return klapi_throw_internal(state, KL_E_RANGE, "there are not that many utf8 characters");
200+
}
201+
202+
klapi_setint(state, -1, idx);
203+
return klapi_return(state, 1);
204+
}
205+
206+
static KlException kllib_string_utf8len(KlState* state) {
207+
if (kl_unlikely(klapi_narg(state) != 1))
208+
return klapi_throw_internal(state, KL_E_ARGNO, "expected exactly one argument");
209+
if (kl_unlikely(!klapi_checktype(state, -1, KL_STRING)))
210+
return klapi_throw_internal(state, KL_E_TYPE, "expected string, got %s", klstring_content(klapi_typename(state, klapi_accessb(state, 0))));
211+
KlString* str = klapi_getstring(state, -1);
212+
const unsigned char* text = (unsigned char*)klstring_content(str);
213+
size_t strlength = klstring_length(str);
214+
size_t idx = 0;
215+
KlInt count = 0;
216+
while (idx < strlength) {
217+
idx += kllib_string_utf8charlen(text + idx);
218+
++count;
219+
}
220+
klapi_setint(state, -1, count);
221+
return klapi_return(state, 1);
222+
}

0 commit comments

Comments
 (0)