Skip to content

Commit cfcb4c7

Browse files
committed
Add long string support.
JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg [email protected]
1 parent efdf91a commit cfcb4c7

File tree

4 files changed

+162
-55
lines changed

4 files changed

+162
-55
lines changed

jerry-core/config.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -85,11 +85,6 @@
8585

8686
#define CONFIG_ECMA_REFERENCE_COUNTER_LIMIT ((1u << CONFIG_ECMA_REFERENCE_COUNTER_WIDTH) - 1u)
8787

88-
/**
89-
* Maximum length of strings' concatenation
90-
*/
91-
#define CONFIG_ECMA_STRING_MAX_CONCATENATION_LENGTH (1048576)
92-
9388
/**
9489
* Use 32-bit/64-bit float for ecma-numbers
9590
*/

jerry-core/ecma/base/ecma-globals.h

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -809,7 +809,10 @@ typedef struct
809809
*/
810810
typedef enum
811811
{
812-
ECMA_STRING_CONTAINER_HEAP_UTF8_STRING, /**< actual data is on the heap as an utf-8 (cesu8) string */
812+
ECMA_STRING_CONTAINER_HEAP_UTF8_STRING, /**< actual data is on the heap as an utf-8 (cesu8) string
813+
* maximum size is 2^16. */
814+
ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING, /**< actual data is on the heap as an utf-8 (cesu8) string
815+
* maximum size is 2^32. */
813816
ECMA_STRING_CONTAINER_UINT32_IN_DESC, /**< actual data is UInt32-represeneted Number
814817
stored locally in the string's descriptor */
815818
ECMA_STRING_CONTAINER_MAGIC_STRING, /**< the ecma-string is equal to one of ECMA magic strings */
@@ -872,31 +875,32 @@ typedef struct ecma_string_t
872875
union
873876
{
874877
/**
875-
* Actual data of an utf-8 string type
876-
*/
878+
* Actual data of an utf-8 string type
879+
*/
877880
struct
878881
{
879-
uint16_t size; /**< Size of this utf-8 string in bytes */
880-
uint16_t length; /**< Length of this utf-8 string in characters */
882+
uint16_t size; /**< size of this utf-8 string in bytes */
883+
uint16_t length; /**< length of this utf-8 string in characters */
881884
} utf8_string;
882885

883-
/** UInt32-represented number placed locally in the descriptor */
884-
uint32_t uint32_number;
885-
886-
/** Identifier of magic string */
887-
lit_magic_string_id_t magic_string_id;
888-
889-
/** Identifier of external magic string */
890-
lit_magic_string_ex_id_t magic_string_ex_id;
891-
892-
/** Literal number */
893-
ecma_value_t lit_number;
894-
895-
/** For zeroing and comparison in some cases */
896-
uint32_t common_field;
886+
lit_utf8_size_t long_utf8_string_size; /**< size of this long utf-8 string in bytes */
887+
uint32_t uint32_number; /**< uint32-represented number placed locally in the descriptor */
888+
lit_magic_string_id_t magic_string_id; /**< identifier of a magic string */
889+
lit_magic_string_ex_id_t magic_string_ex_id; /**< identifier of an external magic string */
890+
ecma_value_t lit_number; /**< literal number (note: not a regular string type) */
891+
uint32_t common_field; /**< for zeroing and comparison in some cases */
897892
} u;
898893
} ecma_string_t;
899894

895+
/**
896+
* Long ECMA string-value descriptor
897+
*/
898+
typedef struct
899+
{
900+
ecma_string_t header; /**< string header */
901+
lit_utf8_size_t long_utf8_string_length; /**< length of this long utf-8 string in bytes */
902+
} ecma_long_string_t;
903+
900904
/**
901905
* Compiled byte code data.
902906
*/

jerry-core/ecma/base/ecma-helpers-string.c

Lines changed: 138 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,6 @@
3131
* @{
3232
*/
3333

34-
/**
35-
* Maximum length of strings' concatenation
36-
*/
37-
#define ECMA_STRING_MAX_CONCATENATION_LENGTH (CONFIG_ECMA_STRING_MAX_CONCATENATION_LENGTH)
38-
39-
/**
40-
* The length should be representable with int32_t.
41-
*/
42-
JERRY_STATIC_ASSERT (ECMA_STRING_MAX_CONCATENATION_LENGTH <= INT32_MAX,
43-
ECMA_STRING_MAX_CONCATENATION_LENGTH_should_be_representable_with_int32_t);
44-
4534
/**
4635
* The ecma string ref counter should start after the container field.
4736
*/
@@ -124,17 +113,37 @@ ecma_new_ecma_string_from_utf8 (const lit_utf8_byte_t *string_p, /**< utf-8 stri
124113
return ecma_get_magic_string_ex (magic_string_ex_id);
125114
}
126115

127-
JERRY_ASSERT (string_size > 0 && string_size <= UINT16_MAX);
116+
JERRY_ASSERT (string_size > 0);
128117

129-
ecma_string_t *string_desc_p = jmem_heap_alloc_block (sizeof (ecma_string_t) + string_size);
118+
ecma_string_t *string_desc_p;
119+
lit_utf8_byte_t *data_p;
130120

131-
string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE;
132-
string_desc_p->hash = lit_utf8_string_calc_hash (string_p, string_size);
133-
string_desc_p->u.common_field = 0;
134-
string_desc_p->u.utf8_string.size = (uint16_t) string_size;
135-
string_desc_p->u.utf8_string.length = (uint16_t) lit_utf8_string_length (string_p, string_size);
121+
if (likely (string_size <= UINT16_MAX))
122+
{
123+
string_desc_p = jmem_heap_alloc_block (sizeof (ecma_string_t) + string_size);
136124

137-
lit_utf8_byte_t *data_p = (lit_utf8_byte_t *) (string_desc_p + 1);
125+
string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE;
126+
string_desc_p->u.common_field = 0;
127+
string_desc_p->u.utf8_string.size = (uint16_t) string_size;
128+
string_desc_p->u.utf8_string.length = (uint16_t) lit_utf8_string_length (string_p, string_size);
129+
130+
data_p = (lit_utf8_byte_t *) (string_desc_p + 1);
131+
}
132+
else
133+
{
134+
string_desc_p = jmem_heap_alloc_block (sizeof (ecma_long_string_t) + string_size);
135+
136+
string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING | ECMA_STRING_REF_ONE;
137+
string_desc_p->u.common_field = 0;
138+
string_desc_p->u.long_utf8_string_size = string_size;
139+
140+
ecma_long_string_t *long_string_desc_p = (ecma_long_string_t *) string_desc_p;
141+
long_string_desc_p->long_utf8_string_length = lit_utf8_string_length (string_p, string_size);
142+
143+
data_p = (lit_utf8_byte_t *) (long_string_desc_p + 1);
144+
}
145+
146+
string_desc_p->hash = lit_utf8_string_calc_hash (string_p, string_size);
138147
memcpy (data_p, string_p, string_size);
139148
return string_desc_p;
140149
} /* ecma_new_ecma_string_from_utf8 */
@@ -348,6 +357,15 @@ ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */
348357
utf8_string1_length = string1_p->u.utf8_string.length;
349358
break;
350359
}
360+
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
361+
{
362+
ecma_long_string_t *long_string_desc_p = (ecma_long_string_t *) string1_p;
363+
364+
utf8_string1_p = (lit_utf8_byte_t *) (long_string_desc_p + 1);
365+
utf8_string1_size = string1_p->u.long_utf8_string_size;
366+
utf8_string1_length = long_string_desc_p->long_utf8_string_length;
367+
break;
368+
}
351369
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
352370
{
353371
utf8_string1_size = ecma_uint32_to_utf8_string (string1_p->u.uint32_number,
@@ -384,6 +402,15 @@ ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */
384402
utf8_string2_length = string2_p->u.utf8_string.length;
385403
break;
386404
}
405+
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
406+
{
407+
ecma_long_string_t *long_string_desc_p = (ecma_long_string_t *) string2_p;
408+
409+
utf8_string2_p = (lit_utf8_byte_t *) (long_string_desc_p + 1);
410+
utf8_string2_size = string2_p->u.long_utf8_string_size;
411+
utf8_string2_length = long_string_desc_p->long_utf8_string_length;
412+
break;
413+
}
387414
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
388415
{
389416
utf8_string2_size = ecma_uint32_to_utf8_string (string2_p->u.uint32_number,
@@ -418,20 +445,44 @@ ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */
418445

419446
lit_utf8_size_t new_size = utf8_string1_size + utf8_string2_size;
420447

421-
JERRY_ASSERT (new_size <= UINT16_MAX);
448+
/* It is impossible to allocate this large string. */
449+
if (new_size < (utf8_string1_size | utf8_string2_size))
450+
{
451+
jerry_fatal (ERR_OUT_OF_MEMORY);
452+
}
422453

423-
ecma_string_t *string_desc_p = jmem_heap_alloc_block (sizeof (ecma_string_t) + new_size);
454+
ecma_string_t *string_desc_p;
455+
lit_utf8_byte_t *data_p;
456+
457+
if (likely (new_size <= UINT16_MAX))
458+
{
459+
string_desc_p = jmem_heap_alloc_block (sizeof (ecma_string_t) + new_size);
460+
461+
string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE;
462+
string_desc_p->u.common_field = 0;
463+
string_desc_p->u.utf8_string.size = (uint16_t) new_size;
464+
string_desc_p->u.utf8_string.length = (uint16_t) (utf8_string1_length + utf8_string2_length);
465+
466+
data_p = (lit_utf8_byte_t *) (string_desc_p + 1);
467+
}
468+
else
469+
{
470+
string_desc_p = jmem_heap_alloc_block (sizeof (ecma_long_string_t) + new_size);
471+
472+
string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING | ECMA_STRING_REF_ONE;
473+
string_desc_p->u.common_field = 0;
474+
string_desc_p->u.long_utf8_string_size = new_size;
475+
476+
ecma_long_string_t *long_string_desc_p = (ecma_long_string_t *) string_desc_p;
477+
long_string_desc_p->long_utf8_string_length = utf8_string1_length + utf8_string2_length;
478+
479+
data_p = (lit_utf8_byte_t *) (long_string_desc_p + 1);
480+
}
424481

425-
string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE;
426482
string_desc_p->hash = lit_utf8_string_hash_combine (string1_p->hash, utf8_string2_p, utf8_string2_size);
427-
string_desc_p->u.common_field = 0;
428-
string_desc_p->u.utf8_string.size = (uint16_t) new_size;
429-
string_desc_p->u.utf8_string.length = (uint16_t) (utf8_string1_length + utf8_string2_length);
430483

431-
lit_utf8_byte_t *data_p = (lit_utf8_byte_t *) (string_desc_p + 1);
432484
memcpy (data_p, utf8_string1_p, utf8_string1_size);
433485
memcpy (data_p + utf8_string1_size, utf8_string2_p, utf8_string2_size);
434-
435486
return string_desc_p;
436487
} /* ecma_concat_ecma_strings */
437488

@@ -480,6 +531,13 @@ ecma_deref_ecma_string (ecma_string_t *string_p) /**< ecma-string */
480531
jmem_heap_free_block (string_p, string_p->u.utf8_string.size + sizeof (ecma_string_t));
481532
return;
482533
}
534+
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
535+
{
536+
JERRY_ASSERT (string_p->u.long_utf8_string_size > UINT16_MAX);
537+
538+
jmem_heap_free_block (string_p, string_p->u.long_utf8_string_size + sizeof (ecma_long_string_t));
539+
return;
540+
}
483541
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
484542
case ECMA_STRING_CONTAINER_MAGIC_STRING:
485543
case ECMA_STRING_CONTAINER_MAGIC_STRING_EX:
@@ -518,6 +576,7 @@ ecma_string_to_number (const ecma_string_t *str_p) /**< ecma-string */
518576
}
519577

520578
case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING:
579+
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
521580
case ECMA_STRING_CONTAINER_MAGIC_STRING:
522581
case ECMA_STRING_CONTAINER_MAGIC_STRING_EX:
523582
{
@@ -562,7 +621,8 @@ ecma_string_get_array_index (const ecma_string_t *str_p, /**< ecma-string */
562621
*out_index_p = index;
563622
return index != UINT32_MAX;
564623
}
565-
else if (type == ECMA_STRING_CONTAINER_MAGIC_STRING)
624+
else if (type == ECMA_STRING_CONTAINER_MAGIC_STRING
625+
|| type == ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING)
566626
{
567627
return false;
568628
}
@@ -660,6 +720,12 @@ ecma_string_copy_to_utf8_buffer (const ecma_string_t *string_desc_p, /**< ecma-s
660720
memcpy (buffer_p, string_desc_p + 1, size);
661721
break;
662722
}
723+
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
724+
{
725+
size = string_desc_p->u.long_utf8_string_size;
726+
memcpy (buffer_p, ((ecma_long_string_t *) string_desc_p) + 1, size);
727+
break;
728+
}
663729
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
664730
{
665731
const uint32_t uint32_number = string_desc_p->u.uint32_number;
@@ -775,6 +841,14 @@ ecma_string_raw_chars (const ecma_string_t *string_p, /**< ecma-string */
775841
result_p = (const lit_utf8_byte_t *) (string_p + 1);
776842
break;
777843
}
844+
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
845+
{
846+
size = string_p->u.long_utf8_string_size;
847+
ecma_long_string_t *long_string_p = (ecma_long_string_t *) string_p;
848+
length = long_string_p->long_utf8_string_length;
849+
result_p = (const lit_utf8_byte_t *) (long_string_p + 1);
850+
break;
851+
}
778852
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
779853
{
780854
size = (lit_utf8_size_t) ecma_string_get_number_in_desc_size (string_p->u.uint32_number);
@@ -890,7 +964,8 @@ ecma_compare_ecma_strings_longpath (const ecma_string_t *string1_p, /* ecma-stri
890964
}
891965
default:
892966
{
893-
JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING);
967+
JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING
968+
|| ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING);
894969
break;
895970
}
896971
}
@@ -910,6 +985,12 @@ ecma_compare_ecma_strings_longpath (const ecma_string_t *string1_p, /* ecma-stri
910985
utf8_string1_size = string1_p->u.utf8_string.size;
911986
break;
912987
}
988+
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
989+
{
990+
utf8_string1_p = (lit_utf8_byte_t *) (((ecma_long_string_t *) string1_p) + 1);
991+
utf8_string1_size = string1_p->u.long_utf8_string_size;
992+
break;
993+
}
913994
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
914995
{
915996
utf8_string1_size = ecma_uint32_to_utf8_string (string1_p->u.uint32_number,
@@ -942,6 +1023,12 @@ ecma_compare_ecma_strings_longpath (const ecma_string_t *string1_p, /* ecma-stri
9421023
utf8_string2_size = string2_p->u.utf8_string.size;
9431024
break;
9441025
}
1026+
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
1027+
{
1028+
utf8_string2_p = (lit_utf8_byte_t *) (((ecma_long_string_t *) string2_p) + 1);
1029+
utf8_string2_size = string2_p->u.long_utf8_string_size;
1030+
break;
1031+
}
9451032
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
9461033
{
9471034
utf8_string2_size = ecma_uint32_to_utf8_string (string2_p->u.uint32_number,
@@ -986,6 +1073,7 @@ ecma_compare_ecma_strings (const ecma_string_t *string1_p, /* ecma-string */
9861073
{
9871074
JERRY_ASSERT (string1_p != NULL && string2_p != NULL);
9881075

1076+
/* Fast paths first. */
9891077
if (string1_p == string2_p)
9901078
{
9911079
return true;
@@ -998,7 +1086,7 @@ ecma_compare_ecma_strings (const ecma_string_t *string1_p, /* ecma-string */
9981086

9991087
ecma_string_container_t string1_container = ECMA_STRING_GET_CONTAINER (string1_p);
10001088

1001-
if (string1_container != ECMA_STRING_CONTAINER_HEAP_UTF8_STRING
1089+
if (string1_container > ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING
10021090
&& string1_container == ECMA_STRING_GET_CONTAINER (string2_p))
10031091
{
10041092
return string1_p->u.common_field == string2_p->u.common_field;
@@ -1041,6 +1129,12 @@ ecma_compare_ecma_strings_relational (const ecma_string_t *string1_p, /**< ecma-
10411129
utf8_string1_size = string1_p->u.utf8_string.size;
10421130
break;
10431131
}
1132+
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
1133+
{
1134+
utf8_string1_p = (lit_utf8_byte_t *) (((ecma_long_string_t *) string1_p) + 1);
1135+
utf8_string1_size = string1_p->u.long_utf8_string_size;
1136+
break;
1137+
}
10441138
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
10451139
{
10461140
utf8_string1_size = ecma_uint32_to_utf8_string (string1_p->u.uint32_number,
@@ -1073,6 +1167,12 @@ ecma_compare_ecma_strings_relational (const ecma_string_t *string1_p, /**< ecma-
10731167
utf8_string2_size = string2_p->u.utf8_string.size;
10741168
break;
10751169
}
1170+
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
1171+
{
1172+
utf8_string2_p = (lit_utf8_byte_t *) (((ecma_long_string_t *) string2_p) + 1);
1173+
utf8_string2_size = string2_p->u.long_utf8_string_size;
1174+
break;
1175+
}
10761176
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
10771177
{
10781178
utf8_string2_size = ecma_uint32_to_utf8_string (string2_p->u.uint32_number,
@@ -1117,6 +1217,10 @@ ecma_string_get_length (const ecma_string_t *string_p) /**< ecma-string */
11171217
{
11181218
return (ecma_length_t) (string_p->u.utf8_string.length);
11191219
}
1220+
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
1221+
{
1222+
return (ecma_length_t) (((ecma_long_string_t *) string_p)->long_utf8_string_length);
1223+
}
11201224
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
11211225
{
11221226
return ecma_string_get_number_in_desc_size (string_p->u.uint32_number);
@@ -1152,6 +1256,10 @@ ecma_string_get_size (const ecma_string_t *string_p) /**< ecma-string */
11521256
{
11531257
return (lit_utf8_size_t) string_p->u.utf8_string.size;
11541258
}
1259+
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
1260+
{
1261+
return (lit_utf8_size_t) string_p->u.long_utf8_string_size;
1262+
}
11551263
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
11561264
{
11571265
return (lit_utf8_size_t) ecma_string_get_number_in_desc_size (string_p->u.uint32_number);

jerry-core/ecma/base/ecma-lcache.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ ecma_lcache_lookup (ecma_object_t *object_p, /**< object */
164164
JERRY_ASSERT ((prop_name_p->hash & ECMA_LCACHE_HASH_MASK) == (entry_prop_name_p->hash & ECMA_LCACHE_HASH_MASK));
165165

166166
if (prop_name_p == entry_prop_name_p
167-
|| (prop_container != ECMA_STRING_CONTAINER_HEAP_UTF8_STRING
167+
|| (prop_container > ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING
168168
&& prop_container == ECMA_STRING_GET_CONTAINER (entry_prop_name_p)
169169
&& prop_name_p->u.common_field == entry_prop_name_p->u.common_field))
170170
{

0 commit comments

Comments
 (0)