@@ -13,6 +13,17 @@ static KlException kllib_string_sub(KlState* state);
1313static KlException kllib_string_find (KlState * state );
1414static KlException kllib_string_join (KlState * state );
1515
16+ static KlException kllib_string_utf8idx (KlState * state );
17+ static KlException kllib_string_utf8len (KlState * state );
18+
19+ static inline size_t kllib_string_utf8charlen (const unsigned char * str ) {
20+ unsigned ch = * str ;
21+ if (ch < 0xC0 ) return 1 ;
22+ if (ch < 0xE0 ) return 2 ;
23+ if (ch < 0xF0 ) return 3 ;
24+ return 4 ;
25+ }
26+
1627static inline KlInt kllib_normalise_stringidx (KlInt idx , size_t strlength ) {
1728 while (idx < 0 )
1829 idx = strlength + idx ;
@@ -38,6 +49,14 @@ KlException KLCONFIG_LIBRARY_STRING_ENTRYFUNCNAME(KlState* state) {
3849 klapi_setcfunc (state , -1 , kllib_string_sub );
3950 KLAPI_PROTECT (klapi_class_newshared_method (state , strclass , klapi_getstring (state , -2 )));
4051
52+ KLAPI_PROTECT (klapi_setstring (state , -2 , "utf8len" ));
53+ klapi_setcfunc (state , -1 , kllib_string_utf8len );
54+ KLAPI_PROTECT (klapi_class_newshared_method (state , strclass , klapi_getstring (state , -2 )));
55+
56+ KLAPI_PROTECT (klapi_setstring (state , -2 , "utf8idx" ));
57+ klapi_setcfunc (state , -1 , kllib_string_utf8idx );
58+ KLAPI_PROTECT (klapi_class_newshared_method (state , strclass , klapi_getstring (state , -2 )));
59+
4160 return klapi_return (state , 0 );
4261}
4362
@@ -58,7 +77,7 @@ static KlException kllib_string_sub(KlState* state) {
5877 end = kllib_normalise_stringidx (klapi_getint (state , -1 ), strlength );
5978 }
6079 if (kl_unlikely (end < begin ))
61- return klapi_throw_internal (state , KL_E_INVLD , "invalid range: (%zd, %zd) for string: %s" , begin , end , klstring_content (str ));
80+ return klapi_throw_internal (state , KL_E_RANGE , "invalid range: (%zd, %zd) for string: %s" , begin , end , klstring_content (str ));
6281 KlString * res = klstrpool_new_string_buf (klstate_strpool (state ), klstring_content (str ) + begin , end - begin );
6382 if (kl_unlikely (!res ))
6483 return klapi_throw_internal (state , KL_E_OOM , "out of memory while creating string" );
@@ -152,3 +171,52 @@ static KlException kllib_string_join_raw(KlState* state, size_t nval, KlValue* v
152171 }
153172 }
154173}
174+
175+ static KlException kllib_string_utf8idx (KlState * state ) {
176+ if (kl_unlikely (klapi_narg (state ) != 2 && klapi_narg (state ) != 3 ))
177+ return klapi_throw_internal (state , KL_E_ARGNO , "expected two or three arguments" );
178+ if (kl_unlikely (!klapi_checktypeb (state , 0 , KL_STRING )))
179+ return klapi_throw_internal (state , KL_E_TYPE , "expected string, got %s" , klstring_content (klapi_typename (state , klapi_accessb (state , 0 ))));
180+ if (kl_unlikely (!klapi_checktypeb (state , 1 , KL_INT )))
181+ return klapi_throw_internal (state , KL_E_TYPE , "expected integer, got %s" , klstring_content (klapi_typename (state , klapi_accessb (state , 1 ))));
182+ KlString * str = klapi_getstringb (state , 0 );
183+ KlInt count = klapi_getintb (state , 1 );
184+ KlInt begin = 0 ;
185+ if (klapi_narg (state ) == 3 ) {
186+ if (kl_unlikely (!klapi_checktype (state , -1 , KL_INT )))
187+ return klapi_throw_internal (state , KL_E_TYPE , "expected integer, got %s" , klstring_content (klapi_typename (state , klapi_access (state , -1 ))));
188+ begin = kllib_normalise_stringidx (klapi_getint (state , -1 ), klstring_length (str ));
189+ }
190+ if (kl_unlikely (count < 0 ))
191+ return klapi_throw_internal (state , KL_E_TYPE , "expected positive index" );
192+
193+ const unsigned char * text = (unsigned char * )klstring_content (str );
194+ size_t strlength = klstring_length (str );
195+ size_t idx = begin ;
196+ while (count -- > 0 ) {
197+ idx += kllib_string_utf8charlen (text + idx );
198+ if (kl_unlikely (idx >= strlength ))
199+ return klapi_throw_internal (state , KL_E_RANGE , "there are not that many utf8 characters" );
200+ }
201+
202+ klapi_setint (state , -1 , idx );
203+ return klapi_return (state , 1 );
204+ }
205+
206+ static KlException kllib_string_utf8len (KlState * state ) {
207+ if (kl_unlikely (klapi_narg (state ) != 1 ))
208+ return klapi_throw_internal (state , KL_E_ARGNO , "expected exactly one argument" );
209+ if (kl_unlikely (!klapi_checktype (state , -1 , KL_STRING )))
210+ return klapi_throw_internal (state , KL_E_TYPE , "expected string, got %s" , klstring_content (klapi_typename (state , klapi_accessb (state , 0 ))));
211+ KlString * str = klapi_getstring (state , -1 );
212+ const unsigned char * text = (unsigned char * )klstring_content (str );
213+ size_t strlength = klstring_length (str );
214+ size_t idx = 0 ;
215+ KlInt count = 0 ;
216+ while (idx < strlength ) {
217+ idx += kllib_string_utf8charlen (text + idx );
218+ ++ count ;
219+ }
220+ klapi_setint (state , -1 , count );
221+ return klapi_return (state , 1 );
222+ }
0 commit comments