Skip to content

Commit 9ef9ff2

Browse files
KristofferCclaude
andcommitted
Add byte access abstraction layer and refactor bit manipulation
This adds a clean abstraction layer for all bit manipulation operations and updates the codebase to use these abstractions instead of raw bit operations. Key changes: - Add byte access functions: get_byte(), set_byte() - Add capacity management: get_capacity_byte(), set_capacity_byte() - Add data manipulation: get_string_data(), resize_string_data() - Add byte clearing: clear_suffix_bytes(), clear_prefix_bytes() - Update all functions to use abstractions instead of raw bit ops - Refactor addcodeunit() to use clean abstractions - Update constructors and type conversions to use abstractions All existing functionality maintained, 4495+ tests pass. Foundation for future C-compatibility improvements. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent 0eb8599 commit 9ef9ff2

File tree

1 file changed

+76
-27
lines changed

1 file changed

+76
-27
lines changed

src/InlineStrings.jl

Lines changed: 76 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,50 @@ const SmallInlineStrings = Union{String1, String3, String7, String15}
7575
clear_n_bytes(s, n) = Base.shl_int(Base.lshr_int(s, 8 * n), 8 * n)
7676
_bswap(x::T) where {T <: InlineString} = Base.bswap_int(x)
7777

78+
# Byte access abstraction layer
79+
@inline get_byte(x::T, i::Int) where {T <: InlineString} =
80+
Base.trunc_int(UInt8, Base.lshr_int(x, 8 * (sizeof(T) - i)))
81+
82+
@inline function set_byte(x::T, i::Int, b::UInt8) where {T <: InlineString}
83+
old_byte = get_byte(x, i)
84+
bit_pos = 8 * (sizeof(T) - i)
85+
x = Base.xor_int(x, Base.shl_int(Base.zext_int(T, old_byte), bit_pos))
86+
return Base.or_int(x, Base.shl_int(Base.zext_int(T, b), bit_pos))
87+
end
88+
89+
@inline get_capacity_byte(x::InlineString) = Base.trunc_int(UInt8, x)
90+
91+
@inline function set_capacity_byte(x::T, b::UInt8) where {T <: InlineString}
92+
old_capacity = get_capacity_byte(x)
93+
cleared = Base.xor_int(x, Base.zext_int(T, old_capacity))
94+
return Base.or_int(cleared, Base.zext_int(T, b))
95+
end
96+
97+
@inline clear_suffix_bytes(x::InlineString, n::Int) = clear_n_bytes(x, n)
98+
99+
@inline function clear_prefix_bytes(x::T, n::Int) where {T <: InlineString}
100+
capacity = get_capacity_byte(x)
101+
without_capacity = Base.xor_int(x, Base.zext_int(T, capacity))
102+
shifted = Base.shl_int(without_capacity, 8 * n)
103+
return Base.or_int(shifted, Base.zext_int(T, capacity))
104+
end
105+
106+
@inline create_with_length(::Type{T}, length::Int) where {T <: InlineString} =
107+
Base.zext_int(T, trailing_byte(T, length))
108+
109+
@inline get_string_data(x::InlineString) = Base.lshr_int(x, 8)
110+
111+
@inline function resize_string_data(x::S, ::Type{T}) where {S <: InlineString, T <: InlineString}
112+
sizeof(T) == sizeof(S) && return x
113+
if sizeof(T) > sizeof(S)
114+
data = get_string_data(x)
115+
return Base.shl_int(Base.zext_int(T, data), 8 * (sizeof(T) - sizeof(S) + 1))
116+
else
117+
shift = 8 * (sizeof(S) - sizeof(T))
118+
return Base.trunc_int(T, Base.lshr_int(x, shift))
119+
end
120+
end
121+
78122
const InlineStringTypes = Union{InlineString1,
79123
InlineString3,
80124
InlineString7,
@@ -115,12 +159,12 @@ Base.widen(::Type{InlineString255}) = String
115159

116160
trailing_byte(::Type{T}, len) where {T <: InlineString} = UInt8(sizeof(T) - len - 1)
117161

118-
Base.ncodeunits(x::InlineString) = Core.sizeof(x) - Int(Base.trunc_int(UInt8, x)) - 1
162+
Base.ncodeunits(x::InlineString) = Core.sizeof(x) - Int(get_capacity_byte(x)) - 1
119163
Base.codeunit(::InlineString) = UInt8
120164

121165
Base.@propagate_inbounds function Base.codeunit(x::T, i::Int) where {T <: InlineString}
122166
@boundscheck checkbounds(Bool, x, i) || throw(BoundsError(x, i))
123-
return Base.trunc_int(UInt8, Base.lshr_int(x, 8 * (sizeof(T) - i)))
167+
return get_byte(x, i)
124168
end
125169

126170
function Base.String(x::T) where {T <: InlineString}
@@ -175,24 +219,25 @@ function Base.show(io::IO, s::InlineString) # So `repr` shows how to recreate `
175219
end
176220
end
177221

178-
# add a codeunit to end of string method
179222
function addcodeunit(x::T, b::UInt8) where {T <: InlineString}
180223
len = Base.trunc_int(UInt8, ncodeunits(x))
181224
sz = Base.trunc_int(UInt8, sizeof(T))
182-
shf = Base.zext_int(Int16, max(0x01, sz - len - 0x01)) << 3
183-
x = Base.or_int(x, Base.shl_int(Base.zext_int(T, b), shf))
184-
return Base.sub_int(x, Base.zext_int(T, 0x01)), (len + 0x01) >= sz
225+
x = set_byte(x, len + 1, b)
226+
x = set_capacity_byte(x, get_capacity_byte(x) - 0x01)
227+
return x, (len + 0x01) >= sz
185228
end
186229

187230
for T in (:InlineString1, :InlineString3, :InlineString7, :InlineString15, :InlineString31, :InlineString63, :InlineString127, :InlineString255)
188-
@eval $T() = Base.zext_int($T, trailing_byte($T, 0))
231+
@eval $T() = create_with_length($T, 0)
189232
@eval function $T(x::AbstractString)
190233
if typeof(x) === String && sizeof($T) <= sizeof(UInt)
191234
len = sizeof(x)
192235
len < sizeof($T) || stringtoolong($T, len)
193236
y = GC.@preserve x unsafe_load(convert(Ptr{$T}, pointer(x)))
194237
sz = 8 * (sizeof($T) - len)
195-
return Base.or_int(Base.shl_int(Base.lshr_int(_bswap(y), sz), sz), Base.zext_int($T, trailing_byte($T, len)))
238+
# Clear unused bytes and set capacity byte
239+
cleared = Base.shl_int(Base.lshr_int(_bswap(y), sz), sz)
240+
return set_capacity_byte(cleared, trailing_byte($T, len))
196241
else
197242
len = ncodeunits(x)
198243
len < sizeof($T) || stringtoolong($T, len)
@@ -221,7 +266,9 @@ for T in (:InlineString1, :InlineString3, :InlineString7, :InlineString15, :Inli
221266
else
222267
y = GC.@preserve buf unsafe_load(convert(Ptr{$T}, pointer(buf, pos)))
223268
sz = 8 * (sizeof($T) - len)
224-
return Base.or_int(Base.shl_int(Base.lshr_int(_bswap(y), sz), sz), Base.zext_int($T, trailing_byte($T, len)))
269+
# Clear unused bytes and set capacity byte
270+
cleared = Base.shl_int(Base.lshr_int(_bswap(y), sz), sz)
271+
return set_capacity_byte(cleared, trailing_byte($T, len))
225272
end
226273
end
227274

@@ -254,12 +301,12 @@ for T in (:InlineString1, :InlineString3, :InlineString7, :InlineString15, :Inli
254301
# trying to compress
255302
len = sizeof(x)
256303
len > (sizeof($T) - 1) && stringtoolong($T, len)
257-
y = Base.trunc_int($T, Base.lshr_int(x, 8 * (sizeof(S) - sizeof($T))))
258-
return Base.add_int(y, Base.zext_int($T, trailing_byte($T, len)))
304+
y = resize_string_data(x, $T)
305+
return set_capacity_byte(y, trailing_byte($T, len))
259306
else
260307
# promoting smaller InlineString to larger
261-
y = Base.shl_int(Base.zext_int($T, Base.lshr_int(x, 8)), 8 * (sizeof($T) - sizeof(S) + 1))
262-
return Base.add_int(y, Base.zext_int($T, trailing_byte($T, sizeof(x))))
308+
y = resize_string_data(x, $T)
309+
return set_capacity_byte(y, trailing_byte($T, sizeof(x)))
263310
end
264311
end
265312
end
@@ -388,8 +435,9 @@ end
388435
@inline function _subinlinestring(s::T, i::Integer, j::Integer) where {T <: InlineString}
389436
new_n = max(0, nextind(s, j) - i) # new ncodeunits
390437
jx = nextind(s, j) - 1 # last codeunit to keep
391-
s = clear_n_bytes(s, sizeof(typeof(s)) - jx)
392-
return Base.or_int(Base.shl_int(s, (i - 1) * 8), _oftype(typeof(s), trailing_byte(T, new_n)))
438+
s = clear_suffix_bytes(s, sizeof(typeof(s)) - jx)
439+
s = clear_prefix_bytes(s, (i - 1))
440+
return set_capacity_byte(s, trailing_byte(T, new_n))
393441
end
394442

395443
Base.getindex(s::InlineString, r::AbstractUnitRange{<:Integer}) = getindex(s, Int(first(r)):Int(last(r)))
@@ -435,9 +483,8 @@ end
435483
new_n = n - nprefix
436484
# call `nextind` for each "character" (not codeunit) in prefix
437485
i = min(n + 1, max(nextind(s, firstindex(s), lprefix), 1))
438-
s = clear_n_bytes(s, 1) # clear out the length bits
439-
s = Base.shl_int(s, (i - 1) * 8) # clear out prefix
440-
return Base.or_int(s, _oftype(typeof(s), trailing_byte(typeof(s), new_n)))
486+
s = clear_prefix_bytes(s, (i - 1))
487+
return set_capacity_byte(s, trailing_byte(typeof(s), new_n))
441488
end
442489

443490
throw_strip_argument_error() =
@@ -481,8 +528,8 @@ _chopsuffix(s::InlineString, suffix::AbstractString) = _chopsuffix(s, ncodeunits
481528
@inline function _chopsuffix(s::InlineString, nsuffix::Int)
482529
n = ncodeunits(s)
483530
new_n = n - nsuffix
484-
s = clear_n_bytes(s, sizeof(typeof(s)) - new_n)
485-
return Base.or_int(s, _oftype(typeof(s), trailing_byte(typeof(s), new_n)))
531+
s = clear_suffix_bytes(s, sizeof(typeof(s)) - new_n)
532+
return set_capacity_byte(s, trailing_byte(typeof(s), new_n))
486533
end
487534

488535
function Base.rstrip(f, s::InlineString)
@@ -505,33 +552,35 @@ function Base.chomp(s::InlineString)
505552
if i < 1 || codeunit(s, i) != 0x0a
506553
return s
507554
elseif i < 2 || codeunit(s, i - 1) != 0x0d
508-
return Base.or_int(clear_n_bytes(s, sizeof(typeof(s)) - i + 1), _oftype(typeof(s), trailing_byte(typeof(s), len - 1)))
555+
s = clear_suffix_bytes(s, sizeof(typeof(s)) - i + 1)
556+
return set_capacity_byte(s, trailing_byte(typeof(s), len - 1))
509557
else
510-
return Base.or_int(clear_n_bytes(s, sizeof(typeof(s)) - i + 2), _oftype(typeof(s), trailing_byte(typeof(s), len - 2)))
558+
s = clear_suffix_bytes(s, sizeof(typeof(s)) - i + 2)
559+
return set_capacity_byte(s, trailing_byte(typeof(s), len - 2))
511560
end
512561
end
513562

514563
function Base.first(s::T, n::Integer) where {T <: InlineString}
515564
newlen = nextind(s, min(lastindex(s), nextind(s, 0, n))) - 1
516565
i = sizeof(T) - newlen
517-
return Base.or_int(clear_n_bytes(s, i), _oftype(typeof(s), trailing_byte(T, newlen)))
566+
s = clear_suffix_bytes(s, i)
567+
return set_capacity_byte(s, trailing_byte(T, newlen))
518568
end
519569

520570
function Base.last(s::T, n::Integer) where {T <: InlineString}
521571
nc = ncodeunits(s) + 1
522572
i = max(1, prevind(s, nc, n))
523573
i == 1 && return s
524574
newlen = nc - i
525-
# clear out the length bits before shifting left
526-
s = clear_n_bytes(s, 1)
527-
return Base.or_int(Base.shl_int(s, (i - 1) * 8), _oftype(typeof(s), trailing_byte(T, newlen)))
575+
s = clear_prefix_bytes(s, (i - 1))
576+
return set_capacity_byte(s, trailing_byte(T, newlen))
528577
end
529578

530579
Base.reverse(x::String1) = x
531580
function Base.reverse(s::T) where {T <: InlineString}
532581
nc = ncodeunits(s)
533582
if isascii(s)
534-
len = Base.zext_int(T, Base.trunc_int(UInt8, s))
583+
len = Base.zext_int(T, get_capacity_byte(s))
535584
x = Base.or_int(Base.shl_int(_bswap(s), 8 * (sizeof(T) - nc)), len)
536585
return x
537586
end

0 commit comments

Comments
 (0)