@@ -75,6 +75,50 @@ const SmallInlineStrings = Union{String1, String3, String7, String15}
7575clear_n_bytes(s, n) = Base. shl_int(Base. lshr_int(s, 8 * n), 8 * n)
7676_bswap(x:: T ) where {T <: InlineString } = Base. bswap_int(x)
7777
78+ # Byte access abstraction layer
79+ @inline get_byte(x:: T , i:: Int ) where {T <: InlineString } =
80+ Base. trunc_int(UInt8, Base. lshr_int(x, 8 * (sizeof(T) - i)))
81+
82+ @inline function set_byte(x:: T , i:: Int , b:: UInt8 ) where {T <: InlineString }
83+ old_byte = get_byte(x, i)
84+ bit_pos = 8 * (sizeof(T) - i)
85+ x = Base. xor_int(x, Base. shl_int(Base. zext_int(T, old_byte), bit_pos))
86+ return Base. or_int(x, Base. shl_int(Base. zext_int(T, b), bit_pos))
87+ end
88+
89+ @inline get_capacity_byte(x:: InlineString ) = Base. trunc_int(UInt8, x)
90+
91+ @inline function set_capacity_byte(x:: T , b:: UInt8 ) where {T <: InlineString }
92+ old_capacity = get_capacity_byte(x)
93+ cleared = Base. xor_int(x, Base. zext_int(T, old_capacity))
94+ return Base. or_int(cleared, Base. zext_int(T, b))
95+ end
96+
97+ @inline clear_suffix_bytes(x:: InlineString , n:: Int ) = clear_n_bytes(x, n)
98+
99+ @inline function clear_prefix_bytes(x:: T , n:: Int ) where {T <: InlineString }
100+ capacity = get_capacity_byte(x)
101+ without_capacity = Base. xor_int(x, Base. zext_int(T, capacity))
102+ shifted = Base. shl_int(without_capacity, 8 * n)
103+ return Base. or_int(shifted, Base. zext_int(T, capacity))
104+ end
105+
106+ @inline create_with_length(:: Type{T} , length:: Int ) where {T <: InlineString } =
107+ Base. zext_int(T, trailing_byte(T, length))
108+
109+ @inline get_string_data(x:: InlineString ) = Base. lshr_int(x, 8 )
110+
111+ @inline function resize_string_data(x:: S , :: Type{T} ) where {S <: InlineString , T <: InlineString }
112+ sizeof(T) == sizeof(S) && return x
113+ if sizeof(T) > sizeof(S)
114+ data = get_string_data(x)
115+ return Base. shl_int(Base. zext_int(T, data), 8 * (sizeof(T) - sizeof(S) + 1 ))
116+ else
117+ shift = 8 * (sizeof(S) - sizeof(T))
118+ return Base. trunc_int(T, Base. lshr_int(x, shift))
119+ end
120+ end
121+
78122const InlineStringTypes = Union{InlineString1,
79123 InlineString3,
80124 InlineString7,
@@ -115,12 +159,12 @@ Base.widen(::Type{InlineString255}) = String
115159
116160trailing_byte(:: Type{T} , len) where {T <: InlineString } = UInt8(sizeof(T) - len - 1 )
117161
118- Base. ncodeunits(x:: InlineString ) = Core. sizeof(x) - Int(Base . trunc_int(UInt8, x)) - 1
162+ Base. ncodeunits(x:: InlineString ) = Core. sizeof(x) - Int(get_capacity_byte( x)) - 1
119163Base. codeunit(:: InlineString ) = UInt8
120164
121165Base. @propagate_inbounds function Base. codeunit(x:: T , i:: Int ) where {T <: InlineString }
122166 @boundscheck checkbounds(Bool, x, i) || throw(BoundsError(x, i))
123- return Base . trunc_int(UInt8, Base . lshr_int( x, 8 * (sizeof(T) - i)) )
167+ return get_byte( x, i )
124168end
125169
126170function Base. String(x:: T ) where {T <: InlineString }
@@ -175,24 +219,25 @@ function Base.show(io::IO, s::InlineString) # So `repr` shows how to recreate `
175219 end
176220end
177221
178- # add a codeunit to end of string method
179222function addcodeunit(x:: T , b:: UInt8 ) where {T <: InlineString }
180223 len = Base. trunc_int(UInt8, ncodeunits(x))
181224 sz = Base. trunc_int(UInt8, sizeof(T))
182- shf = Base . zext_int(Int16, max( 0x01 , sz - len - 0x01 )) << 3
183- x = Base . or_int (x, Base . shl_int(Base . zext_int(T, b), shf) )
184- return Base . sub_int(x, Base . zext_int(T, 0x01 )) , (len + 0x01 ) >= sz
225+ x = set_byte(x, len + 1 , b)
226+ x = set_capacity_byte (x, get_capacity_byte(x) - 0x01 )
227+ return x , (len + 0x01 ) >= sz
185228end
186229
187230for T in (:InlineString1, :InlineString3, :InlineString7, :InlineString15, :InlineString31, :InlineString63, :InlineString127, :InlineString255)
188- @eval $ T() = Base . zext_int ($ T, trailing_byte( $ T, 0 ) )
231+ @eval $ T() = create_with_length ($ T, 0 )
189232 @eval function $ T(x:: AbstractString )
190233 if typeof(x) === String && sizeof($ T) <= sizeof(UInt)
191234 len = sizeof(x)
192235 len < sizeof($ T) || stringtoolong($ T, len)
193236 y = GC. @preserve x unsafe_load(convert(Ptr{$ T}, pointer(x)))
194237 sz = 8 * (sizeof($ T) - len)
195- return Base. or_int(Base. shl_int(Base. lshr_int(_bswap(y), sz), sz), Base. zext_int($ T, trailing_byte($ T, len)))
238+ # Clear unused bytes and set capacity byte
239+ cleared = Base. shl_int(Base. lshr_int(_bswap(y), sz), sz)
240+ return set_capacity_byte(cleared, trailing_byte($ T, len))
196241 else
197242 len = ncodeunits(x)
198243 len < sizeof($ T) || stringtoolong($ T, len)
@@ -221,7 +266,9 @@ for T in (:InlineString1, :InlineString3, :InlineString7, :InlineString15, :Inli
221266 else
222267 y = GC. @preserve buf unsafe_load(convert(Ptr{$ T}, pointer(buf, pos)))
223268 sz = 8 * (sizeof($ T) - len)
224- return Base. or_int(Base. shl_int(Base. lshr_int(_bswap(y), sz), sz), Base. zext_int($ T, trailing_byte($ T, len)))
269+ # Clear unused bytes and set capacity byte
270+ cleared = Base. shl_int(Base. lshr_int(_bswap(y), sz), sz)
271+ return set_capacity_byte(cleared, trailing_byte($ T, len))
225272 end
226273 end
227274
@@ -254,12 +301,12 @@ for T in (:InlineString1, :InlineString3, :InlineString7, :InlineString15, :Inli
254301 # trying to compress
255302 len = sizeof(x)
256303 len > (sizeof($ T) - 1 ) && stringtoolong($ T, len)
257- y = Base . trunc_int( $ T, Base . lshr_int( x, 8 * (sizeof(S) - sizeof( $ T))) )
258- return Base . add_int (y, Base . zext_int( $ T, trailing_byte($ T, len) ))
304+ y = resize_string_data( x, $ T )
305+ return set_capacity_byte (y, trailing_byte($ T, len))
259306 else
260307 # promoting smaller InlineString to larger
261- y = Base . shl_int(Base . zext_int( $ T, Base . lshr_int( x, 8 )), 8 * (sizeof( $ T) - sizeof(S) + 1 ) )
262- return Base . add_int (y, Base . zext_int( $ T, trailing_byte($ T, sizeof(x) )))
308+ y = resize_string_data( x, $ T )
309+ return set_capacity_byte (y, trailing_byte($ T, sizeof(x)))
263310 end
264311 end
265312end
388435@inline function _subinlinestring(s:: T , i:: Integer , j:: Integer ) where {T <: InlineString }
389436 new_n = max(0 , nextind(s, j) - i) # new ncodeunits
390437 jx = nextind(s, j) - 1 # last codeunit to keep
391- s = clear_n_bytes(s, sizeof(typeof(s)) - jx)
392- return Base. or_int(Base. shl_int(s, (i - 1 ) * 8 ), _oftype(typeof(s), trailing_byte(T, new_n)))
438+ s = clear_suffix_bytes(s, sizeof(typeof(s)) - jx)
439+ s = clear_prefix_bytes(s, (i - 1 ))
440+ return set_capacity_byte(s, trailing_byte(T, new_n))
393441end
394442
395443Base. getindex(s:: InlineString , r:: AbstractUnitRange{<:Integer} ) = getindex(s, Int(first(r)): Int(last(r)))
435483 new_n = n - nprefix
436484 # call `nextind` for each "character" (not codeunit) in prefix
437485 i = min(n + 1 , max(nextind(s, firstindex(s), lprefix), 1 ))
438- s = clear_n_bytes(s, 1 ) # clear out the length bits
439- s = Base. shl_int(s, (i - 1 ) * 8 ) # clear out prefix
440- return Base. or_int(s, _oftype(typeof(s), trailing_byte(typeof(s), new_n)))
486+ s = clear_prefix_bytes(s, (i - 1 ))
487+ return set_capacity_byte(s, trailing_byte(typeof(s), new_n))
441488end
442489
443490throw_strip_argument_error() =
@@ -481,8 +528,8 @@ _chopsuffix(s::InlineString, suffix::AbstractString) = _chopsuffix(s, ncodeunits
481528@inline function _chopsuffix(s:: InlineString , nsuffix:: Int )
482529 n = ncodeunits(s)
483530 new_n = n - nsuffix
484- s = clear_n_bytes (s, sizeof(typeof(s)) - new_n)
485- return Base . or_int (s, _oftype(typeof(s), trailing_byte(typeof(s), new_n) ))
531+ s = clear_suffix_bytes (s, sizeof(typeof(s)) - new_n)
532+ return set_capacity_byte (s, trailing_byte(typeof(s), new_n))
486533end
487534
488535function Base. rstrip(f, s:: InlineString )
@@ -505,33 +552,35 @@ function Base.chomp(s::InlineString)
505552 if i < 1 || codeunit(s, i) != 0x0a
506553 return s
507554 elseif i < 2 || codeunit(s, i - 1 ) != 0x0d
508- return Base. or_int(clear_n_bytes(s, sizeof(typeof(s)) - i + 1 ), _oftype(typeof(s), trailing_byte(typeof(s), len - 1 )))
555+ s = clear_suffix_bytes(s, sizeof(typeof(s)) - i + 1 )
556+ return set_capacity_byte(s, trailing_byte(typeof(s), len - 1 ))
509557 else
510- return Base. or_int(clear_n_bytes(s, sizeof(typeof(s)) - i + 2 ), _oftype(typeof(s), trailing_byte(typeof(s), len - 2 )))
558+ s = clear_suffix_bytes(s, sizeof(typeof(s)) - i + 2 )
559+ return set_capacity_byte(s, trailing_byte(typeof(s), len - 2 ))
511560 end
512561end
513562
514563function Base. first(s:: T , n:: Integer ) where {T <: InlineString }
515564 newlen = nextind(s, min(lastindex(s), nextind(s, 0 , n))) - 1
516565 i = sizeof(T) - newlen
517- return Base. or_int(clear_n_bytes(s, i), _oftype(typeof(s), trailing_byte(T, newlen)))
566+ s = clear_suffix_bytes(s, i)
567+ return set_capacity_byte(s, trailing_byte(T, newlen))
518568end
519569
520570function Base. last(s:: T , n:: Integer ) where {T <: InlineString }
521571 nc = ncodeunits(s) + 1
522572 i = max(1 , prevind(s, nc, n))
523573 i == 1 && return s
524574 newlen = nc - i
525- # clear out the length bits before shifting left
526- s = clear_n_bytes(s, 1 )
527- return Base. or_int(Base. shl_int(s, (i - 1 ) * 8 ), _oftype(typeof(s), trailing_byte(T, newlen)))
575+ s = clear_prefix_bytes(s, (i - 1 ))
576+ return set_capacity_byte(s, trailing_byte(T, newlen))
528577end
529578
530579Base. reverse(x:: String1 ) = x
531580function Base. reverse(s:: T ) where {T <: InlineString }
532581 nc = ncodeunits(s)
533582 if isascii(s)
534- len = Base. zext_int(T, Base . trunc_int(UInt8, s))
583+ len = Base. zext_int(T, get_capacity_byte( s))
535584 x = Base. or_int(Base. shl_int(_bswap(s), 8 * (sizeof(T) - nc)), len)
536585 return x
537586 end
0 commit comments