diff --git a/compiler/ccgcalls.nim b/compiler/ccgcalls.nim index 8bab471ee78d9..30326c8db0d94 100644 --- a/compiler/ccgcalls.nim +++ b/compiler/ccgcalls.nim @@ -230,20 +230,29 @@ proc genOpenArraySlice(p: BProc; q: PNode; formalType, destType: PType; prepareF of tyString, tySequence: let atyp = skipTypes(a.t, abstractInst) if formalType.skipTypes(abstractInst).kind in {tyVar} and atyp.kind == tyString and - optSeqDestructors in p.config.globalOptions: + optSeqDestructors in p.config.globalOptions and not p.config.isDefined("nimsso"): let bra = byRefLoc(p, a) p.s(cpsStmts).addCallStmt(cgsymValue(p.module, "nimPrepareStrMutationV2"), bra) - var val: Snippet - if atyp.kind in {tyVar} and not compileToCpp(p.module): - val = cDeref(ra) + if p.config.isDefined("nimsso") and + skipTypes(a.t, abstractVar + abstractInst).kind == tyString: + let strPtr = if atyp.kind in {tyVar} and not compileToCpp(p.module): ra + else: addrLoc(p.config, a) + result = ( + cCast(ptrType(dest), cOp(Add, NimInt, + cCall(cgsymValue(p.module, "nimStrData"), strPtr), rb)), + lengthExpr) else: - val = ra - result = ( - cIfExpr(dataFieldAccessor(p, val), - cCast(ptrType(dest), cOp(Add, NimInt, dataField(p, val), rb)), - NimNil), - lengthExpr) + var val: Snippet + if atyp.kind in {tyVar} and not compileToCpp(p.module): + val = cDeref(ra) + else: + val = ra + result = ( + cIfExpr(dataFieldAccessor(p, val), + cCast(ptrType(dest), cOp(Add, NimInt, dataField(p, val), rb)), + NimNil), + lengthExpr) else: result = ("", "") internalError(p.config, "openArrayLoc: " & typeToString(a.t)) @@ -287,11 +296,22 @@ proc openArrayLoc(p: BProc, formalType: PType, n: PNode; result: var Builder) = of tyString, tySequence: let ntyp = skipTypes(n.typ, abstractInst) if formalType.skipTypes(abstractInst).kind in {tyVar} and ntyp.kind == tyString and - optSeqDestructors in p.config.globalOptions: + optSeqDestructors in p.config.globalOptions and not p.config.isDefined("nimsso"): let bra = byRefLoc(p, a) p.s(cpsStmts).addCallStmt(cgsymValue(p.module, "nimPrepareStrMutationV2"), bra) - if ntyp.kind in {tyVar} and not compileToCpp(p.module): + if p.config.isDefined("nimsso") and + skipTypes(n.typ, abstractVar + abstractInst).kind == tyString: + if ntyp.kind in {tyVar} and not compileToCpp(p.module): + let ra = a.rdLoc + result.add(cCall(cgsymValue(p.module, "nimStrData"), ra)) + result.addArgumentSeparator() + result.add(cCall(cgsymValue(p.module, "nimStrLen"), cDeref(ra))) + else: + result.add(cCall(cgsymValue(p.module, "nimStrData"), addrLoc(p.config, a))) + result.addArgumentSeparator() + result.add(lenExpr(p, a)) + elif ntyp.kind in {tyVar} and not compileToCpp(p.module): let ra = a.rdLoc var t = TLoc(snippet: cDeref(ra)) let lt = lenExpr(p, t) @@ -315,9 +335,14 @@ proc openArrayLoc(p: BProc, formalType: PType, n: PNode; result: var Builder) = let ra = a.rdLoc var t = TLoc(snippet: cDeref(ra)) let lt = lenExpr(p, t) - result.add(cIfExpr(dataFieldAccessor(p, t.snippet), dataField(p, t.snippet), NimNil)) - result.addArgumentSeparator() - result.add(lt) + if p.config.isDefined("nimsso"): + result.add(cCall(cgsymValue(p.module, "nimStrData"), ra)) + result.addArgumentSeparator() + result.add(cCall(cgsymValue(p.module, "nimStrLen"), t.snippet)) + else: + result.add(cIfExpr(dataFieldAccessor(p, t.snippet), dataField(p, t.snippet), NimNil)) + result.addArgumentSeparator() + result.add(lt) of tyArray: let ra = rdLoc(a) result.add(ra) @@ -344,7 +369,8 @@ proc expressionsNeedsTmp(p: BProc, a: TLoc): TLoc = proc genArgStringToCString(p: BProc, n: PNode; result: var Builder; needsTmp: bool) {.inline.} = var a = initLocExpr(p, n[0]) - let ra = withTmpIfNeeded(p, a, needsTmp).rdLoc + let tmp = withTmpIfNeeded(p, a, needsTmp) + let ra = if p.config.isDefined("nimsso"): addrLoc(p.config, tmp) else: tmp.rdLoc result.addCall(cgsymValue(p.module, "nimToCStringConv"), ra) proc genArg(p: BProc, n: PNode, param: PSym; call: PNode; result: var Builder; needsTmp = false) = diff --git a/compiler/ccgexprs.nim b/compiler/ccgexprs.nim index 9f2ac2ff9ba8f..5cb4a00af1260 100644 --- a/compiler/ccgexprs.nim +++ b/compiler/ccgexprs.nim @@ -320,12 +320,16 @@ proc genOpenArrayConv(p: BProc; d: TLoc; a: TLoc; flags: TAssignmentFlags) = p.s(cpsStmts).addCallStmt( cgsymValue(p.module, "nimPrepareStrMutationV2"), bra) - let rd = d.rdLoc - let ra = a.rdLoc - p.s(cpsStmts).addFieldAssignment(rd, "Field0", - cIfExpr(dataFieldAccessor(p, ra), dataField(p, ra), NimNil)) let la = lenExpr(p, a) + if p.config.isDefined("nimsso"): + let bra = byRefLoc(p, a) + p.s(cpsStmts).addFieldAssignment(rd, "Field0", + cCall(cgsymValue(p.module, "nimStrData"), bra)) + else: + let ra = a.rdLoc + p.s(cpsStmts).addFieldAssignment(rd, "Field0", + cIfExpr(dataFieldAccessor(p, ra), dataField(p, ra), NimNil)) p.s(cpsStmts).addFieldAssignment(rd, "Field1", la) else: internalError(p.config, a.lode.info, "cannot handle " & $a.t.kind) @@ -958,7 +962,8 @@ proc genDeref(p: BProc, e: PNode, d: var TLoc) = putIntoDest(p, d, e, cDeref(rdLoc(a)), a.storage) proc cowBracket(p: BProc; n: PNode) = - if n.kind == nkBracketExpr and optSeqDestructors in p.config.globalOptions: + if n.kind == nkBracketExpr and optSeqDestructors in p.config.globalOptions and + not p.config.isDefined("nimsso"): let strCandidate = n[0] if strCandidate.typ.skipTypes(abstractInst).kind == tyString: var a: TLoc = initLocExpr(p, strCandidate) @@ -984,7 +989,9 @@ proc genAddr(p: BProc, e: PNode, d: var TLoc) = # bug #19497 d.lode = e else: - var a: TLoc = initLocExpr(p, e[0]) + let ssoStrSub = p.config.isDefined("nimsso") and e[0].kind == nkBracketExpr and + e[0][0].typ.skipTypes(abstractVar).kind == tyString + var a: TLoc = initLocExpr(p, e[0], if ssoStrSub: {lfEnforceDeref, lfPrepareForMutation} else: {}) if e[0].kind in {nkHiddenStdConv, nkHiddenSubConv, nkConv} and not ignoreConv(e[0]): # addr (conv x) introduces a temp because `conv x` is not a rvalue # transform addr ( conv ( x ) ) -> conv ( addr ( x ) ) @@ -1311,13 +1318,24 @@ proc genSeqElem(p: BProc, n, x, y: PNode, d: var TLoc) = if skipTypes(a.t, abstractVar).kind in {tyRef, tyPtr}: a.snippet = cDeref(a.snippet) - if lfPrepareForMutation in d.flags and ty.kind == tyString and - optSeqDestructors in p.config.globalOptions: + if p.config.isDefined("nimsso") and ty.kind == tyString: let bra = byRefLoc(p, a) - p.s(cpsStmts).addCallStmt(cgsymValue(p.module, "nimPrepareStrMutationV2"), - bra) - let ra = rdLoc(a) - putIntoDest(p, d, n, subscript(dataField(p, ra), rcb), a.storage) + if lfPrepareForMutation in d.flags: + # Use nimStrAtMutV3 to get a mutable reference (char*) to the element. + # Only when mutation is requested: avoids calling nimPrepareStrMutationV2 + # on const string literals (which would SIGSEGV on write to read-only memory). + putIntoDest(p, d, n, + cDeref(cCall(cgsymValue(p.module, "nimStrAtMutV3"), bra, rcb)), a.storage) + else: + putIntoDest(p, d, n, + cCall(cgsymValue(p.module, "nimStrAtV3"), bra, rcb), a.storage) + else: + if lfPrepareForMutation in d.flags and ty.kind == tyString and + optSeqDestructors in p.config.globalOptions: + let bra = byRefLoc(p, a) + p.s(cpsStmts).addCallStmt(cgsymValue(p.module, "nimPrepareStrMutationV2"), bra) + let ra = rdLoc(a) + putIntoDest(p, d, n, subscript(dataField(p, ra), rcb), a.storage) proc genBracketExpr(p: BProc; n: PNode; d: var TLoc) = var ty = skipTypes(n[0].typ, abstractVarRange + tyUserTypeClasses) @@ -2124,12 +2142,20 @@ proc genRepr(p: BProc, e: PNode, d: var TLoc) = let ra = rdLoc(a) putIntoDest(p, b, e, ra & cArgumentSeparator & ra & "Len_0", a.storage) of tyString, tySequence: - let ra = rdLoc(a) let la = lenExpr(p, a) - putIntoDest(p, b, e, - cIfExpr(dataFieldAccessor(p, ra), dataField(p, ra), NimNil) & - cArgumentSeparator & la, - a.storage) + if p.config.isDefined("nimsso") and + skipTypes(a.t, abstractVarRange).kind == tyString: + let bra = byRefLoc(p, a) + putIntoDest(p, b, e, + cCall(cgsymValue(p.module, "nimStrData"), bra) & + cArgumentSeparator & la, + a.storage) + else: + let ra = rdLoc(a) + putIntoDest(p, b, e, + cIfExpr(dataFieldAccessor(p, ra), dataField(p, ra), NimNil) & + cArgumentSeparator & la, + a.storage) of tyArray: let ra = rdLoc(a) let la = cIntValue(lengthOrd(p.config, a.t)) @@ -2710,9 +2736,9 @@ proc genConv(p: BProc, e: PNode, d: var TLoc) = proc convStrToCStr(p: BProc, n: PNode, d: var TLoc) = var a: TLoc = initLocExpr(p, n[0]) + let arg = if p.config.isDefined("nimsso"): addrLoc(p.config, a) else: rdLoc(a) putIntoDest(p, d, n, - cgCall(p, "nimToCStringConv", rdLoc(a)), -# "($1 ? $1->data : (NCSTRING)\"\")" % [a.rdLoc], + cgCall(p, "nimToCStringConv", arg), a.storage) proc convCStrToStr(p: BProc, n: PNode, d: var TLoc) = @@ -2783,19 +2809,25 @@ proc genWasMoved(p: BProc; n: PNode) = # [addrLoc(p.config, a), getTypeDesc(p.module, a.t)]) proc genMove(p: BProc; n: PNode; d: var TLoc) = - var a: TLoc = initLocExpr(p, n[1].skipAddr, {lfEnforceDeref}) + var a: TLoc = initLocExpr(p, n[1].skipAddr, {lfEnforceDeref, lfPrepareForMutation}) if n.len == 4: # generated by liftdestructors: var src: TLoc = initLocExpr(p, n[2]) let destVal = rdLoc(a) let srcVal = rdLoc(src) - p.s(cpsStmts).addSingleIfStmt( - cOp(NotEqual, - dotField(destVal, "p"), - dotField(srcVal, "p"))): + if p.config.isDefined("nimsso") and + n[1].typ.skipTypes(abstractVar).kind == tyString: + # SmallString: destroy dst then struct-copy src; no .p field aliasing needed genStmts(p, n[3]) - p.s(cpsStmts).addFieldAssignment(destVal, "len", dotField(srcVal, "len")) - p.s(cpsStmts).addFieldAssignment(destVal, "p", dotField(srcVal, "p")) + genAssignment(p, a, src, {}) + else: + p.s(cpsStmts).addSingleIfStmt( + cOp(NotEqual, + dotField(destVal, "p"), + dotField(srcVal, "p"))): + genStmts(p, n[3]) + p.s(cpsStmts).addFieldAssignment(destVal, "len", dotField(srcVal, "len")) + p.s(cpsStmts).addFieldAssignment(destVal, "p", dotField(srcVal, "p")) else: if d.k == locNone: d = getTemp(p, n.typ) if p.config.selectedGC in {gcArc, gcAtomicArc, gcOrc, gcYrc}: @@ -2832,15 +2864,19 @@ proc genDestroy(p: BProc; n: PNode) = case t.kind of tyString: var a: TLoc = initLocExpr(p, arg) - let ra = rdLoc(a) - let rp = dotField(ra, "p") - p.s(cpsStmts).addSingleIfStmt( - cOp(And, rp, - cOp(Not, cOp(BitAnd, NimInt, - derefField(rp, "cap"), - NimStrlitFlag)))): - let fn = if optThreads in p.config.globalOptions: "deallocShared" else: "dealloc" - p.s(cpsStmts).addCallStmt(cgsymValue(p.module, fn), rp) + if p.config.isDefined("nimsso"): + # SmallString: delegate to nimDestroyStrV1 (rc-based, handles static strings) + p.s(cpsStmts).addCallStmt(cgsymValue(p.module, "nimDestroyStrV1"), rdLoc(a)) + else: + let ra = rdLoc(a) + let rp = dotField(ra, "p") + p.s(cpsStmts).addSingleIfStmt( + cOp(And, rp, + cOp(Not, cOp(BitAnd, NimInt, + derefField(rp, "cap"), + NimStrlitFlag)))): + let fn = if optThreads in p.config.globalOptions: "deallocShared" else: "dealloc" + p.s(cpsStmts).addCallStmt(cgsymValue(p.module, fn), rp) of tySequence: var a: TLoc = initLocExpr(p, arg) let ra = rdLoc(a) @@ -4200,7 +4236,10 @@ proc genBracedInit(p: BProc, n: PNode; isConst: bool; optionalType: PType; resul genConstObjConstr(p, n, isConst, result) of tyString, tyCstring: if optSeqDestructors in p.config.globalOptions and n.kind != nkNilLit and ty == tyString: - genStringLiteralV2Const(p.module, n, isConst, result) + if p.config.isDefined("nimsso"): + genStringLiteralV3Const(p.module, n, isConst, result) + else: + genStringLiteralV2Const(p.module, n, isConst, result) else: var d: TLoc = initLocExpr(p, n) result.add rdLoc(d) diff --git a/compiler/ccgliterals.nim b/compiler/ccgliterals.nim index a1ad3ae047ed4..54823cc59286e 100644 --- a/compiler/ccgliterals.nim +++ b/compiler/ccgliterals.nim @@ -22,7 +22,11 @@ template detectVersion(field, corename) = result = 1 proc detectStrVersion(m: BModule): int = - detectVersion(strVersion, "nimStrVersion") + if m.g.config.isDefined("nimsso") and + m.g.config.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc, gcHooks}: + result = 3 + else: + detectVersion(strVersion, "nimStrVersion") proc detectSeqVersion(m: BModule): int = detectVersion(seqVersion, "nimSeqVersion") @@ -128,6 +132,192 @@ proc genStringLiteralV2Const(m: BModule; n: PNode; isConst: bool; result: var Bu result.addField(strInit, name = "p"): result.add(cCast(ptrType("NimStrPayload"), cAddr(pureLit))) +proc ssoCharLit(ch: char): string = + ## Return a C char literal for ch, with proper escaping. + const hexDigits = "0123456789abcdef" + result = "'" + case ch + of '\'': result.add("\\'") + of '\\': result.add("\\\\") + of '\0': result.add("\\0") + of '\n': result.add("\\n") + of '\r': result.add("\\r") + of '\t': result.add("\\t") + elif ch.ord < 32 or ch.ord == 127: + result.add("\\x") + result.add(hexDigits[ch.ord shr 4]) + result.add(hexDigits[ch.ord and 0xf]) + else: + result.add(ch) + result.add('\'') + +proc ssoBytesLit(m: BModule; s: string; slen: int): string = + ## Compute the `bytes` field value for the new SmallString layout. + ## byte 0 = slen, bytes 1-7 = inline chars 0-6 (zero-padded). + ## On LE: slen in bits 0-7, char[i] in bits (i+1)*8..(i+1)*8+7. + ## On BE: slen in bits 56-63, char[i] in bits (6-i)*8..(6-i)*8+7. + const AlwaysAvail = 7 + var val: uint64 + if CPU[m.g.config.target.targetCPU].endian == littleEndian: + val = uint64(slen) + for i in 0.. 0: - copyMem(buf, addr(s.s[s.rd]), result) + copyMem(buf, readRawData(s.s, s.rd), result) inc(s.rd, result) proc llStreamRead*(s: PLLStream, buf: pointer, bufLen: int): int = @@ -173,7 +173,7 @@ proc llStreamRead*(s: PLLStream, buf: pointer, bufLen: int): int = of llsString: result = min(bufLen, s.s.len - s.rd) if result > 0: - copyMem(buf, addr(s.s[0 + s.rd]), result) + copyMem(buf, readRawData(s.s, s.rd), result) inc(s.rd, result) of llsFile: result = readBuffer(s.f, buf, bufLen) diff --git a/lib/pure/lexbase.nim b/lib/pure/lexbase.nim index 1efd97b244e45..e36192a6e3d4d 100644 --- a/lib/pure/lexbase.nim +++ b/lib/pure/lexbase.nim @@ -65,7 +65,9 @@ proc fillBuffer(L: var BaseLexer) = L.buf[i] = L.buf[L.sentinel + 1 + i] else: # "moveMem" handles overlapping regions - moveMem(addr L.buf[0], addr L.buf[L.sentinel + 1], toCopy) + let p = beginStore(L.buf, L.buf.len) + moveMem(p, addr p[L.sentinel + 1], toCopy) + endStore(L.buf) charsRead = L.input.readDataStr(L.buf, toCopy ..< toCopy + L.sentinel + 1) s = toCopy + charsRead if charsRead < L.sentinel + 1: diff --git a/lib/pure/osproc.nim b/lib/pure/osproc.nim index 5718efb51c64e..502358a85dce5 100644 --- a/lib/pure/osproc.nim +++ b/lib/pure/osproc.nim @@ -921,7 +921,7 @@ elif not defined(useNimRtl): for key, val in pairs(t): var x = key & "=" & val result[i] = cast[cstring](alloc(x.len+1)) - copyMem(result[i], addr(x[0]), x.len+1) + copyMem(result[i], x.cstring, x.len+1) inc(i) proc envToCStringArray(): cstringArray = @@ -932,7 +932,7 @@ elif not defined(useNimRtl): for key, val in envPairs(): var x = key & "=" & val result[i] = cast[cstring](alloc(x.len+1)) - copyMem(result[i], addr(x[0]), x.len+1) + copyMem(result[i], x.cstring, x.len+1) inc(i) type diff --git a/lib/pure/streams.nim b/lib/pure/streams.nim index 7d422ff4fef0c..a1fffa5d95038 100644 --- a/lib/pure/streams.nim +++ b/lib/pure/streams.nim @@ -259,10 +259,8 @@ proc readDataStr*(s: Stream, buffer: var string, slice: Slice[int]): int = result = s.readDataStrImpl(s, buffer, slice) else: # fallback - when declared(prepareMutation): - # buffer might potentially be a CoW literal with ARC - prepareMutation(buffer) - result = s.readData(addr buffer[slice.a], slice.b + 1 - slice.a) + result = s.readData(beginStore(buffer, slice.b + 1 - slice.a, slice.a), slice.b + 1 - slice.a) + endStore(buffer) template jsOrVmBlock(caseJsOrVm, caseElse: untyped): untyped = when nimvm: @@ -1228,7 +1226,8 @@ else: # after 1.3 or JS not defined jsOrVmBlock: buffer[slice.a.. 0: - let found = c_memchr(s[start].unsafeAddr, cint(sub), cast[csize_t](length)) + let sdata = readRawData(s) + let found = c_memchr(addr sdata[start], cint(sub), cast[csize_t](length)) if not found.isNil: - return cast[int](found) -% cast[int](s.cstring) + return cast[int](found) -% cast[int](sdata) else: findImpl() @@ -2041,9 +2042,10 @@ func find*(s, sub: string, start: Natural = 0, last = -1): int {.rtl, when declared(memmem): let subLen = sub.len if last < 0 and start < s.len and subLen != 0: - let found = memmem(s[start].unsafeAddr, csize_t(s.len - start), sub.cstring, csize_t(subLen)) + let sdata = readRawData(s) + let found = memmem(addr sdata[start], csize_t(s.len - start), readRawData(sub), csize_t(subLen)) result = if not found.isNil: - cast[int](found) -% cast[int](s.cstring) + cast[int](found) -% cast[int](sdata) else: -1 else: diff --git a/lib/std/formatfloat.nim b/lib/std/formatfloat.nim index 767de111b5b57..44f745c264004 100644 --- a/lib/std/formatfloat.nim +++ b/lib/std/formatfloat.nim @@ -19,7 +19,12 @@ proc addCstringN(result: var string, buf: cstring; buflen: int) = let oldLen = result.len let newLen = oldLen + buflen result.setLen newLen - c_memcpy(result[oldLen].addr, buf, buflen.csize_t) + {.cast(noSideEffect).}: + when declared(completeStore): + c_memcpy(beginStore(result, buflen, oldLen), buf, buflen.csize_t) + endStore(result) + else: + discard c_memcpy(result[oldLen].addr, buf, buflen.csize_t) import std/private/[dragonbox, schubfach] diff --git a/lib/std/private/digitsutils.nim b/lib/std/private/digitsutils.nim index 73b28a68ba8d2..8b6fc1b4e8323 100644 --- a/lib/std/private/digitsutils.nim +++ b/lib/std/private/digitsutils.nim @@ -52,7 +52,7 @@ func addChars[T](result: var string, x: T, start: int, n: int) {.inline, enforce for i in 0.. 0 and line[last-1] == '\c': line.setLen(last-1) return last > 1 or fgetsSuccess @@ -564,7 +566,8 @@ proc readAllBuffer(file: File): string = result = "" var buffer = newString(BufSize) while true: - var bytesRead = readBuffer(file, addr(buffer[0]), BufSize) + var bytesRead = readBuffer(file, beginStore(buffer, BufSize), BufSize) + endStore(buffer) if bytesRead == BufSize: result.add(buffer) else: @@ -590,7 +593,8 @@ proc readAllFile(file: File, len: int64): string = # We acquire the filesize beforehand and hope it doesn't change. # Speeds things up. result = newString(len) - let bytes = readBuffer(file, addr(result[0]), len) + let bytes = readBuffer(file, beginStore(result, len.int), len.int) + endStore(result) if endOfFile(file): if bytes.int64 < len: result.setLen(bytes) diff --git a/lib/system.nim b/lib/system.nim index 306818ffa0b0b..28a0415a126f2 100644 --- a/lib/system.nim +++ b/lib/system.nim @@ -1622,26 +1622,29 @@ when notJSnotNims: include system/sysmem when notJSnotNims and defined(nimSeqsV2): - const nimStrVersion {.core.} = 2 + when defined(nimsso): + const nimStrVersion {.core.} = 3 + else: + const nimStrVersion {.core.} = 2 - type - NimStrPayloadBase = object - cap: int + type + NimStrPayloadBase = object + cap: int - NimStrPayload {.core.} = object - cap: int - data: UncheckedArray[char] + NimStrPayload {.core.} = object + cap: int + data: UncheckedArray[char] - NimStringV2 {.core.} = object - len: int - p: ptr NimStrPayload ## can be nil if len == 0. + NimStringV2 {.core.} = object + len: int + p: ptr NimStrPayload ## can be nil if len == 0. when defined(windows): proc GetLastError(): int32 {.header: "", nodecl.} const ERROR_BAD_EXE_FORMAT = 193 when notJSnotNims: - when defined(nimSeqsV2): + when defined(nimSeqsV2) and not defined(nimsso): proc nimToCStringConv(s: NimStringV2): cstring {.compilerproc, nonReloadable, inline.} when hostOS != "standalone" and hostOS != "any": @@ -1689,9 +1692,32 @@ when not defined(nimIcIntegrityChecks): export exceptions when notJSnotNims and defined(nimSeqsV2): - include "system/strs_v2" + when defined(nimsso): + include "system/strs_v3" + else: + include "system/strs_v2" include "system/seqs_v2" +when not (notJSnotNims and defined(nimSeqsV2)): + # Fallback implementations for backends where strs_v2/v3 is not included. + # Needed so modules imported by system (e.g. syncio) can reference these without guards. + when notJSnotNims: + # mm:refc: string = ptr NimStringDesc with data: UncheckedArray[char] + proc beginStore*(s: var string; ensuredLen: int; start = 0): ptr UncheckedArray[char] {.inline, noSideEffect, raises: [], tags: [].} = + let ns = cast[NimString](s) + if ns == nil: nil + else: cast[ptr UncheckedArray[char]](addr ns.data[start]) + proc endStore*(s: var string) {.inline, noSideEffect, raises: [], tags: [].} = discard + template readRawData*(s: string; start = 0): ptr UncheckedArray[char] = + let ns = cast[NimString](s) + if ns == nil: nil + else: cast[ptr UncheckedArray[char]](addr ns.data[start]) + else: + # JS/nimscript: callers are guarded by whenNotVmJsNims/when not defined(js) + proc beginStore*(s: var string; ensuredLen: int; start = 0): ptr UncheckedArray[char] {.inline, noSideEffect, raises: [], tags: [].} = nil + proc endStore*(s: var string) {.inline, noSideEffect, raises: [], tags: [].} = discard + template readRawData*(s: string; start = 0): ptr UncheckedArray[char] = nil + when not defined(js): template newSeqImpl(T, len) = result = newSeqOfCap[T](len) @@ -1741,6 +1767,9 @@ when not defined(js): else: {.error: "The type T cannot contain managed memory or have destructors".} + when defined(nimsso) and not declared(newStringUninitWasDeclared): + proc newStringUninitImpl(len: Natural): string {.noSideEffect, inline.} + proc newStringUninit*(len: Natural): string {.noSideEffect.} = ## Returns a new string of length `len` but with uninitialized ## content. One needs to fill the string character after character @@ -1751,17 +1780,20 @@ when not defined(js): when nimvm: result = newString(len) else: - result = newStringOfCap(len) - {.cast(noSideEffect).}: - when defined(nimSeqsV2): - let s = cast[ptr NimStringV2](addr result) - if len > 0: + when defined(nimsso): + result = newStringUninitImpl(len) + else: + result = newStringOfCap(len) + {.cast(noSideEffect).}: + when defined(nimSeqsV2): + let s = cast[ptr NimStringV2](addr result) + if len > 0: + s.len = len + s.p.data[len] = '\0' + else: + let s = cast[NimString](result) s.len = len - s.p.data[len] = '\0' - else: - let s = cast[NimString](result) - s.len = len - s.data[len] = '\0' + s.data[len] = '\0' else: proc newStringUninit*(len: Natural): string {. magic: "NewString", importc: "mnewString", noSideEffect.} @@ -2244,10 +2276,13 @@ when not defined(js) or defined(nimscript): else: result = 0 else: when not defined(nimscript): # avoid semantic checking - let minlen = min(x.len, y.len) - result = int(nimCmpMem(x.cstring, y.cstring, cast[csize_t](minlen))) - if result == 0: - result = x.len - y.len + when defined(nimsso): + result = cmpStrings(x, y) + else: + let minlen = min(x.len, y.len) + result = int(nimCmpMem(x.cstring, y.cstring, cast[csize_t](minlen))) + if result == 0: + result = x.len - y.len when declared(newSeq): proc cstringArrayToSeq*(a: cstringArray, len: Natural): seq[string] = @@ -2913,7 +2948,9 @@ proc substr*(a: openArray[char]): string = result = newStringUninit(a.len) whenNotVmJsNims(): if a.len > 0: - copyMem(result[0].addr, a[0].unsafeAddr, a.len) + {.cast(noSideEffect).}: + copyMem(beginStore(result, a.len), a[0].unsafeAddr, a.len) + endStore(result) do: for i, ch in a: result[i] = ch @@ -2948,7 +2985,8 @@ proc substr*(s: string; first, last: int): string = # A bug with `magic: Slice` result = newStringUninit(L) whenNotVmJsNims(): if L > 0: - copyMem(result[0].addr, s[first].unsafeAddr, L) + copyMem(beginStore(result, L), readRawData(s, first), L) + endStore(result) do: for i in 0.. 0: - result = c_memcmp(unsafeAddr a[0], unsafeAddr b[0], cast[csize_t](minlen)).int - if result == 0: +when not defined(nimsso): + proc cmpStrings(a, b: string): int {.inline, compilerproc.} = + let alen = a.len + let blen = b.len + let minlen = min(alen, blen) + if minlen > 0: + result = c_memcmp(unsafeAddr a[0], unsafeAddr b[0], cast[csize_t](minlen)).int + if result == 0: + result = alen - blen + else: result = alen - blen - else: - result = alen - blen - -proc leStrings(a, b: string): bool {.inline, compilerproc.} = - # required by upcoming backends (NIR). - cmpStrings(a, b) <= 0 - -proc ltStrings(a, b: string): bool {.inline, compilerproc.} = - # required by upcoming backends (NIR). - cmpStrings(a, b) < 0 - -proc eqStrings(a, b: string): bool {.inline, compilerproc.} = - result = false - let alen = a.len - let blen = b.len - if alen == blen: - if alen == 0: return true - return equalMem(unsafeAddr(a[0]), unsafeAddr(b[0]), alen) - -proc hashString(s: string): int {.compilerproc.} = - # the compiler needs exactly the same hash function! - # this used to be used for efficient generation of string case statements - var h = 0'u - for i in 0..len(s)-1: - h = h + uint(s[i]) - h = h + h shl 10 - h = h xor (h shr 6) - h = h + h shl 3 - h = h xor (h shr 11) - h = h + h shl 15 - result = cast[int](h) + + proc leStrings(a, b: string): bool {.inline, compilerproc.} = + # required by upcoming backends (NIR). + cmpStrings(a, b) <= 0 + + proc ltStrings(a, b: string): bool {.inline, compilerproc.} = + # required by upcoming backends (NIR). + cmpStrings(a, b) < 0 + + proc eqStrings(a, b: string): bool {.inline, compilerproc.} = + result = false + let alen = a.len + let blen = b.len + if alen == blen: + if alen == 0: return true + return equalMem(unsafeAddr(a[0]), unsafeAddr(b[0]), alen) + + proc hashString(s: string): int {.compilerproc.} = + # the compiler needs exactly the same hash function! + # this used to be used for efficient generation of string case statements + var h = 0'u + for i in 0..len(s)-1: + h = h + uint(s[i]) + h = h + h shl 10 + h = h xor (h shr 6) + h = h + h shl 3 + h = h xor (h shr 11) + h = h + h shl 15 + result = cast[int](h) proc eqCstrings(a, b: cstring): bool {.inline, compilerproc.} = if pointer(a) == pointer(b): result = true diff --git a/lib/system/strs_v2.nim b/lib/system/strs_v2.nim index 9861c9ae4e434..6942b69a6d5d4 100644 --- a/lib/system/strs_v2.nim +++ b/lib/system/strs_v2.nim @@ -176,18 +176,18 @@ proc nimAsgnStrV2(a: var NimStringV2, b: NimStringV2) {.compilerRtl.} = a.len = b.len copyMem(unsafeAddr a.p.data[0], unsafeAddr b.p.data[0], b.len+1) -proc nimPrepareStrMutationImpl(s: var NimStringV2) = +proc nimPrepareStrMutationImpl(s: var NimStringV2) {.raises: [], tags: [].} = let oldP = s.p # can't mutate a literal, so we need a fresh copy here: s.p = allocPayload(s.len) s.p.cap = s.len copyMem(unsafeAddr s.p.data[0], unsafeAddr oldP.data[0], s.len+1) -proc nimPrepareStrMutationV2(s: var NimStringV2) {.compilerRtl, inl.} = +proc nimPrepareStrMutationV2(s: var NimStringV2) {.compilerRtl, inl, raises: [], tags: [].} = if s.p != nil and (s.p.cap and strlitFlag) == strlitFlag: nimPrepareStrMutationImpl(s) -proc prepareMutation*(s: var string) {.inline.} = +proc prepareMutation*(s: var string) {.inline, raises: [], tags: [].} = # string literals are "copy on write", so you need to call # `prepareMutation` before modifying the strings via `addr`. {.cast(noSideEffect).}: @@ -216,4 +216,25 @@ func capacity*(self: string): int {.inline.} = let str = cast[ptr NimStringV2](unsafeAddr self) result = if str.p != nil: str.p.cap and not strlitFlag else: 0 +proc beginStore*(s: var string; ensuredLen: int; start = 0): ptr UncheckedArray[char] {.inline, noSideEffect, raises: [], tags: [].} = + ## Returns a writable pointer for bulk write of `ensuredLen` bytes starting at `start`. + ## Call `endStore(s)` afterwards for portability. + {.cast(noSideEffect).}: prepareMutation(s) + let str = cast[ptr NimStringV2](unsafeAddr s) + if str.p == nil: nil + else: cast[ptr UncheckedArray[char]](addr str.p.data[start]) + +proc endStore*(s: var string) {.inline, noSideEffect, raises: [], tags: [].} = + ## No-op for non-SSO strings; call after bulk writes via `beginStore`. + discard + +proc rawDataImpl(str: ptr NimStringV2; start: int): ptr UncheckedArray[char] {.inline, noSideEffect, raises: [], tags: [].} = + if str.p == nil: nil + else: cast[ptr UncheckedArray[char]](addr str.p.data[start]) + +template readRawData*(s: string; start = 0): ptr UncheckedArray[char] = + ## Returns a pointer to `s[start]` for read-only raw access. + ## Template ensures no copy of `s`; ptr is valid while `s` is alive. + rawDataImpl(cast[ptr NimStringV2](unsafeAddr s), start) + {.pop.} diff --git a/lib/system/strs_v3.nim b/lib/system/strs_v3.nim new file mode 100644 index 0000000000000..ef69aae679f4c --- /dev/null +++ b/lib/system/strs_v3.nim @@ -0,0 +1,743 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2026 Nim contributors +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Small String Optimization (SSO) implementation used by Nim's core. + +const + AlwaysAvail = sizeof(uint) - 1 # inline chars that fit in the `bytes` field alongside slen + PayloadSize = AlwaysAvail + sizeof(pointer) - 1 # -1 reserves the last byte for '\0' + HeapSlen = 255 # slen sentinel: heap-allocated long string; capImpl = raw capacity + StaticSlen = 254 # slen sentinel: static/literal long string; capImpl = 0, never freed + LongStringDataOffset = 3 * sizeof(int) # byte offset of LongString.data from struct start + +when false: + proc atomicAddFetch(p: var int; v: int): int {.importc: "__sync_add_and_fetch", nodecl.} + proc atomicSubFetch(p: var int; v: int): int {.importc: "__sync_sub_and_fetch", nodecl.} +else: + proc atomicAddFetch(p: var int; v: int): int {.inline.} = + result = p + v + p = result + proc atomicSubFetch(p: var int; v: int): int {.inline.} = + result = p - v + p = result + +type + LongString {.core.} = object + fullLen: int + rc: int # atomic reference count; 1 = unique owner + capImpl: int # raw capacity; 0 for static literals (never freed, slen = StaticSlen) + data: UncheckedArray[char] + + SmallString {.core.} = object + bytes: uint + ## Layout (little-endian): byte 0 = slen; bytes 1..AlwaysAvail = inline chars 0..AlwaysAvail-1. + ## Bytes after the null terminator are zero (SWAR invariant). + ## When slen == HeapSlen (255), `more` is a heap-owned LongString block. + ## When slen == StaticSlen (254), `more` points to a static LongString literal. + ## When AlwaysAvail < slen <= PayloadSize, `more` holds raw char bytes AlwaysAvail..PayloadSize-1 (medium string). + more: ptr LongString + +when sizeof(uint) == 8: + proc bswap(x: uint): uint {.importc: "__builtin_bswap64", nodecl, noSideEffect.} + proc ctzImpl(x: uint): int {.inline.} = + proc ctz64(x: uint64): int32 {.importc: "__builtin_ctzll", nodecl, noSideEffect.} + int(ctz64(uint64(x))) +else: + proc bswap(x: uint): uint {.importc: "__builtin_bswap32", nodecl, noSideEffect.} + proc ctzImpl(x: uint): int {.inline.} = + proc ctz32(x: uint32): int32 {.importc: "__builtin_ctz", nodecl, noSideEffect.} + int(ctz32(uint32(x))) + +proc swarKey(x: uint): uint {.inline.} = + ## Returns a value where inline char[0] is in the most significant byte, + ## so that integer comparison gives lexicographic string order. + ## LE: slen in bits 0-7; `bswap(x shr 8)` puts char[0] in MSB. + ## BE: slen in bits (sizeof(uint)-1)*8..(sizeof(uint)*8-1) (MSB); `x shl 8` shifts slen out, char[0] lands in MSB. + when system.cpuEndian == littleEndian: + bswap(x shr 8) + else: + x shl 8 + +# ---- accessors ---- +# Memory layout is identical on both endiannesses: byte 0 = slen, bytes 1..AlwaysAvail = inline chars. +# But the integer value of `bytes` differs: on LE slen is in the LSB, on BE in the MSB. + +template ssLenOf(bytes: uint): int = + ## Extract slen from an already-loaded `bytes` word. Zero-cost (register op only). + ## Use when `bytes` is already in a register (e.g. loaded for SWAR comparison). + when system.cpuEndian == littleEndian: + int(bytes and 0xFF'u) + else: + int(bytes shr (8 * (sizeof(uint) - 1))) + +proc cmpShortInline(abytes, bbytes: uint; aslen, bslen: int): int {.inline.} = + let minLen = min(aslen, bslen) + if minLen > 0: + when system.cpuEndian == littleEndian: + let diffMask = (1'u shl (minLen * 8)) - 1'u + let diff = ((abytes xor bbytes) shr 8) and diffMask + if diff != 0: + let byteShift = (ctzImpl(diff) shr 3) * 8 + 8 + let ac = (abytes shr byteShift) and 0xFF'u + let bc = (bbytes shr byteShift) and 0xFF'u + if ac < bc: return -1 + return 1 + else: + let aw = swarKey(abytes) + let bw = swarKey(bbytes) + if aw < bw: return -1 + if aw > bw: return 1 + aslen - bslen + +template ssLen(s: SmallString): int = + ## Load slen via a direct byte access at offset 0 (valid on both LE and BE). + ## A byte load (movzx) lets the C compiler prove that slen is at offset 0, + ## distinct from inline char writes at offsets 1+, enabling register-caching + ## of slen across char-write loops (e.g. nimAddCharV1). + int(cast[ptr byte](unsafeAddr s.bytes)[]) + +template setSSLen(s: var SmallString; v: int) = + # Single byte store — equivalent to old `s.slen = byte(v)`. + # Accessing a uint via byte* is legal in C (char-pointer aliasing exemption). + cast[ptr byte](addr s.bytes)[] = cast[byte](v) + +# Pointer to inline chars (offset +1 from `bytes` field / start of struct). +# Only valid when s is in memory (var/ptr); forces a load from memory. +template inlinePtr(s: SmallString): ptr UncheckedArray[char] = + cast[ptr UncheckedArray[char]](cast[uint](unsafeAddr s.bytes) + 1'u) + +# Same but from a ptr SmallString (avoids unsafeAddr dance). +template inlinePtrOf(p: ptr SmallString): ptr UncheckedArray[char] = + cast[ptr UncheckedArray[char]](cast[uint](p) + 1'u) + +proc resize(old: int): int {.inline.} = + ## Capacity growth factor shared with seqs_v2.nim. + if old <= 0: result = 4 + elif old <= high(int16): result = old * 2 + else: result = old div 2 + old + +# No Nim lifecycle hooks: the compiler calls the compilerRtl procs directly +# for tyString variables (nimDestroyStrV1, nimAsgnStrV2). + +proc nimDestroyStrV1(s: SmallString) {.compilerRtl, inline.} = + if ssLen(s) == HeapSlen: + if atomicSubFetch(s.more.rc, 1) == 0: + dealloc(s.more) + +proc ensureUniqueLong(s: var SmallString; oldLen, newLen: int) = + # Ensure s.more is a unique (rc=1) heap block with capacity >= newLen, preserving existing data. + # s must already be a long string (slen >= StaticSlen) on entry. + # After return, slen == HeapSlen (s is heap-owned). + let isHeap = ssLen(s) == HeapSlen + let cap = if isHeap: s.more.capImpl else: 0 # static literals have capImpl=0 + if isHeap and s.more.rc == 1 and newLen <= cap: + s.more.fullLen = newLen + else: + # Only grow capacity when actually needed; pure COW copies (newLen <= cap) + # preserve the existing capacity to avoid exponential growth via repeated COW. + let newCap = if newLen > cap: max(newLen, resize(cap)) else: cap + let p = cast[ptr LongString](alloc(LongStringDataOffset + newCap + 1)) + p.rc = 1 + p.fullLen = newLen + p.capImpl = newCap + let old = s.more + copyMem(addr p.data[0], addr old.data[0], oldLen + 1) # +1 preserves the '\0' + if isHeap and atomicSubFetch(old.rc, 1) == 0: + dealloc(old) + s.more = p + setSSLen(s, HeapSlen) # mark as heap-owned (also handles static→heap promotion) + +proc len(s: SmallString): int {.inline.} = + result = ssLen(s) + if result > PayloadSize: + result = s.more.fullLen + +template guts(s: SmallString): (int, ptr UncheckedArray[char]) = + let slen = ssLen(s) + if slen > PayloadSize: + (s.more.fullLen, cast[ptr UncheckedArray[char]](addr s.more.data[0])) + else: + (slen, inlinePtr(s)) + +proc nimStrAtV3*(s: var SmallString; i: int): char {.compilerproc, inline.} = + if ssLen(s) <= PayloadSize: + # short/medium: data is in the inline bytes overlay + result = inlinePtr(s)[i] + else: + # long: always use heap data (completeStore keeps more.data canonical) + result = s.more.data[i] + +proc nimStrPutV3*(s: var SmallString; i: int; c: char) {.compilerproc, inline.} = + let slen = ssLen(s) + if slen <= PayloadSize: + # unchecked: when i >= 7 we store into the `more` overlay + inlinePtr(s)[i] = c + # Maintain SWAR zeroing invariant: if i < AlwaysAvail and we wrote a non-null, + # caller is responsible. Writing '\0' here would break content. No action needed. + else: + let l = s.more.fullLen + ensureUniqueLong(s, l, l) # COW if shared; length unchanged + s.more.data[i] = c + if i < AlwaysAvail: + inlinePtr(s)[i] = c + +proc cmpInlineBytes(a, b: ptr UncheckedArray[char]; n: int): int {.inline.} = + for i in 0.. bc: return 1 + +proc cmpStringPtrs(a, b: ptr SmallString): int {.inline.} = + # Compare two SmallStrings by pointer to avoid struct copies in the hot path. + let abytes = a.bytes + let bbytes = b.bytes + let aslen = ssLenOf(abytes) + let bslen = ssLenOf(bbytes) + if aslen <= AlwaysAvail and bslen <= AlwaysAvail: + # SWAR path: both short (≤7 bytes). All data lives in the `bytes` field. + # Zeroed-padding invariant ensures bytes past the null are 0. + # swarKey puts char[0] in the MSB → integer comparison is lexicographic. + let aw = swarKey(abytes) + let bw = swarKey(bbytes) + if aw < bw: return -1 + if aw > bw: return 1 + return aslen - bslen + if aslen <= PayloadSize and bslen <= PayloadSize: + # Both inline/medium: all data lives in the flat struct, no heap access needed. + let minLen = min(aslen, bslen) + let pfxLen = min(minLen, AlwaysAvail) + result = cmpInlineBytes(inlinePtrOf(a), inlinePtrOf(b), pfxLen) + if result != 0: return + if minLen > AlwaysAvail: + let aInl = inlinePtrOf(a) + let bInl = inlinePtrOf(b) + result = cmpInlineBytes( + cast[ptr UncheckedArray[char]](addr aInl[AlwaysAvail]), + cast[ptr UncheckedArray[char]](addr bInl[AlwaysAvail]), + minLen - AlwaysAvail) + if result == 0: result = aslen - bslen + return + # At least one is long. Hot prefix: inlinePtr[0..AlwaysAvail-1] mirrors heap data. + let pfxLen = min(min(aslen, bslen), AlwaysAvail) + result = cmpInlineBytes(inlinePtrOf(a), inlinePtrOf(b), pfxLen) + if result != 0: return + let la = if aslen > PayloadSize: a.more.fullLen else: aslen + let lb = if bslen > PayloadSize: b.more.fullLen else: bslen + let minLen = min(la, lb) + if minLen <= AlwaysAvail: + result = la - lb + return + let ap = if aslen > PayloadSize: cast[ptr UncheckedArray[char]](addr a.more.data[0]) else: + inlinePtrOf(a) + let bp = if bslen > PayloadSize: cast[ptr UncheckedArray[char]](addr b.more.data[0]) else: + inlinePtrOf(b) + result = cmpMem(addr ap[AlwaysAvail], addr bp[AlwaysAvail], minLen - AlwaysAvail) + if result == 0: result = la - lb + +proc cmp(a, b: SmallString): int {.inline.} = + # Load bytes once per string — used for both slen check and SWAR key. + let abytes = a.bytes + let bbytes = b.bytes + let aslen = ssLenOf(abytes) + let bslen = ssLenOf(bbytes) + if aslen <= AlwaysAvail and bslen <= AlwaysAvail: + return cmpShortInline(abytes, bbytes, aslen, bslen) + cmpStringPtrs(unsafeAddr a, unsafeAddr b) + +proc `==`(a, b: SmallString): bool {.inline.} = + let abytes = a.bytes + let bbytes = b.bytes + let aslen = ssLenOf(abytes) + let bslen = ssLenOf(bbytes) + if aslen <= AlwaysAvail and bslen <= AlwaysAvail: + return abytes == bbytes # SWAR: slen equal, data in bytes word + # Compute actual lengths (sentinels 254/255 → more.fullLen) + let la = if aslen > PayloadSize: a.more.fullLen else: aslen + let lb = if bslen > PayloadSize: b.more.fullLen else: bslen + if la != lb: return false + if la == 0: return true + if aslen <= PayloadSize and bslen <= PayloadSize: + # Both medium (slen == la == lb, so byte0 equal): compare prefix word + tail + if abytes != bbytes: return false + let (_, pa) = a.guts + let (_, pb) = b.guts + return cmpMem(addr pa[AlwaysAvail], addr pb[AlwaysAvail], la - AlwaysAvail) == 0 + # At least one long (heap or static): delegate to cmpStringPtrs + cmpStringPtrs(unsafeAddr a, unsafeAddr b) == 0 + +proc continuesWith*(s, sub: SmallString; start: int): bool = + if start < 0: return false + let subslen = ssLen(sub) + if subslen == 0: return true + let sslen = ssLen(s) + # Compare via hot prefix first where possible (no heap dereference). + let pfxLen = min(subslen, max(0, AlwaysAvail - start)) + if pfxLen > 0: + if cmpMem(cast[pointer](cast[uint](unsafeAddr s.bytes) + 1'u + uint(start)), + cast[pointer](cast[uint](unsafeAddr sub.bytes) + 1'u), pfxLen) != 0: + return false + # Fetch actual lengths and compare the remaining tail via heap/guts. + let subLen = if subslen > PayloadSize: sub.more.fullLen else: subslen + let sLen = if sslen > PayloadSize: s.more.fullLen else: sslen + if start + subLen > sLen: return false + if pfxLen == subLen: return true + let (_, sp) = s.guts + let (_, subp) = sub.guts + cmpMem(addr sp[start + pfxLen], addr subp[pfxLen], subLen - pfxLen) == 0 + +proc startsWith*(s, sub: SmallString): bool {.inline.} = continuesWith(s, sub, 0) +proc endsWith*(s, sub: SmallString): bool {.inline.} = continuesWith(s, sub, s.len - sub.len) + + +proc add(s: var SmallString; c: char) = + let slen = ssLen(s) + if slen <= PayloadSize: + let newLen = slen + 1 + if newLen <= PayloadSize: + let inl = inlinePtr(s) + inl[slen] = c + inl[newLen] = '\0' + setSSLen(s, newLen) + else: + # transition from medium (slen == PayloadSize) to long + let cap = newLen * 2 + let p = cast[ptr LongString](alloc(LongStringDataOffset + cap + 1)) + p.rc = 1 + p.fullLen = newLen + p.capImpl = cap + copyMem(addr p.data[0], inlinePtr(s), slen) + p.data[slen] = c + p.data[newLen] = '\0' + s.more = p + setSSLen(s, HeapSlen) + else: + let l = s.more.fullLen # fetch fullLen only in the long path + ensureUniqueLong(s, l, l + 1) + s.more.data[l] = c + s.more.data[l + 1] = '\0' + if l < AlwaysAvail: + inlinePtr(s)[l] = c + +proc add(s: var SmallString; t: SmallString) = + let slen = ssLen(s) + let (tl, tp) = t.guts # fetch t's guts before any mutation (aliasing safety) + if tl == 0: return + if slen <= PayloadSize: + let sl = slen # for short/medium, slen IS the actual length + let newLen = sl + tl + if newLen <= PayloadSize: + let inl = inlinePtr(s) + copyMem(addr inl[sl], tp, tl) + inl[newLen] = '\0' + setSSLen(s, newLen) + else: + # transition to long + let cap = newLen * 2 + let p = cast[ptr LongString](alloc(LongStringDataOffset + cap + 1)) + p.rc = 1 + p.fullLen = newLen + p.capImpl = cap + copyMem(addr p.data[0], inlinePtr(s), sl) + copyMem(addr p.data[sl], tp, tl) + p.data[newLen] = '\0' + if sl < AlwaysAvail: + copyMem(addr inlinePtr(s)[sl], tp, min(AlwaysAvail - sl, tl)) + s.more = p + setSSLen(s, HeapSlen) + else: + let sl = s.more.fullLen # fetch fullLen only in the long path + let newLen = sl + tl + # tp was read before ensureUniqueLong: if t.more == s.more, rc decrements but won't hit 0 + ensureUniqueLong(s, sl, newLen) + copyMem(addr s.more.data[sl], tp, tl) + s.more.data[newLen] = '\0' + if sl < AlwaysAvail: + copyMem(addr inlinePtr(s)[sl], tp, min(AlwaysAvail - sl, tl)) + +{.push overflowChecks: off, rangeChecks: off.} + +proc prepareAddLong(s: var SmallString; newLen: int) = + # Reserve capacity for newLen in the long-string block without changing logical length. + let isHeap = ssLen(s) == HeapSlen + let cap = if isHeap: s.more.capImpl else: 0 + if isHeap and s.more.rc == 1 and newLen <= cap: + discard # already unique with sufficient capacity + else: + let oldLen = s.more.fullLen + let newCap = max(newLen, resize(cap)) + let p = cast[ptr LongString](alloc(LongStringDataOffset + newCap + 1)) + p.rc = 1 + p.fullLen = oldLen # logical length unchanged — caller sets it after writing data + p.capImpl = newCap + let old = s.more + copyMem(addr p.data[0], addr old.data[0], oldLen + 1) + if isHeap and atomicSubFetch(old.rc, 1) == 0: + dealloc(old) + s.more = p + setSSLen(s, HeapSlen) + +proc prepareAdd(s: var SmallString; addLen: int) {.compilerRtl.} = + ## Ensure s has room for addLen more characters without changing its length. + let slen = ssLen(s) + let curLen = if slen > PayloadSize: s.more.fullLen else: slen + let newLen = curLen + addLen + if slen <= PayloadSize: + if newLen > PayloadSize: + # transition to long: allocate, copy existing data + let newCap = newLen * 2 + let p = cast[ptr LongString](alloc(LongStringDataOffset + newCap + 1)) + p.rc = 1 + p.fullLen = curLen + p.capImpl = newCap + copyMem(addr p.data[0], inlinePtr(s), curLen + 1) + s.more = p + setSSLen(s, HeapSlen) + # else: short/medium — inline capacity always sufficient (struct is fixed size) + else: + prepareAddLong(s, newLen) + +proc nimAddCharV1(s: var SmallString; c: char) {.compilerRtl, inline.} = + let slen = ssLen(s) + if slen < PayloadSize: + # Hot path: inline/medium with room (slen+1 <= PayloadSize, no heap needed) + let inl = inlinePtr(s) + inl[slen] = c + inl[slen + 1] = '\0' + setSSLen(s, slen + 1) + elif slen > PayloadSize: + # Long string — inline the common case: unique heap block with room + let l = s.more.fullLen + if slen == HeapSlen and s.more.rc == 1 and l < s.more.capImpl: + s.more.data[l] = c + s.more.data[l + 1] = '\0' + s.more.fullLen = l + 1 + if l < AlwaysAvail: + inlinePtr(s)[l] = c + else: + prepareAdd(s, 1) + s.add(c) + else: + # slen == PayloadSize: medium→long transition (rare) + prepareAdd(s, 1) + s.add(c) + +proc toNimStr(str: cstring; len: int): SmallString {.compilerproc.} = + if len <= 0: return + if len <= PayloadSize: + setSSLen(result, len) + let inl = inlinePtr(result) + copyMem(inl, str, len) + inl[len] = '\0' + # Bytes past inl[len] in `bytes` must be zero for SWAR. `result` is zero-initialized, + # and copyMem only fills bytes 0..len-1 of inl; bytes len..6 remain zero. + else: + let p = cast[ptr LongString](alloc(LongStringDataOffset + len + 1)) + p.rc = 1 + p.fullLen = len + p.capImpl = len + copyMem(addr p.data[0], str, len) + p.data[len] = '\0' + copyMem(inlinePtr(result), str, AlwaysAvail) + setSSLen(result, HeapSlen) + result.more = p + +proc cstrToNimstr(str: cstring): SmallString {.compilerRtl.} = + if str == nil: return + toNimStr(str, str.len) + +proc nimToCStringConv(s: var SmallString): cstring {.compilerproc, nonReloadable, inline.} = + ## Returns a null-terminated C string pointer into s's data. + ## Takes by var (pointer) so the inline chars ptr is always valid. + if ssLen(s) > PayloadSize: + cast[cstring](addr s.more.data[0]) + else: + cast[cstring](inlinePtr(s)) + +proc appendString(dest: var SmallString; src: SmallString) {.compilerproc, inline.} = + dest.add(src) + +proc appendChar(dest: var SmallString; c: char) {.compilerproc, inline.} = + dest.add(c) + +proc rawNewString(space: int): SmallString {.compilerproc.} = + ## Returns an empty SmallString with capacity reserved for `space` chars (newStringOfCap). + if space <= 0: return + if space <= PayloadSize: + discard # inline capacity is always available; nothing to pre-allocate + else: + let p = cast[ptr LongString](alloc(LongStringDataOffset + space + 1)) + p.rc = 1 + p.fullLen = 0 + p.capImpl = space + p.data[0] = '\0' + result.more = p + setSSLen(result, HeapSlen) + +proc mnewString(len: int): SmallString {.compilerproc.} = + ## Returns a SmallString of `len` zero characters (newString). + if len <= 0: return + if len <= PayloadSize: + setSSLen(result, len) + # bytes field is zero-initialized (result starts at 0); inline chars are already 0. + # Null terminator at inlinePtr(result)[len] is also 0 — fine for SWAR invariant. + else: + let p = cast[ptr LongString](alloc0(LongStringDataOffset + len + 1)) + p.rc = 1 + p.fullLen = len + p.capImpl = len + # data is zeroed by alloc0; data[len] is '\0' too + result.more = p + setSSLen(result, HeapSlen) + +proc setLengthStrV2(s: var SmallString; newLen: int) {.compilerRtl.} = + ## Sets the length of s to newLen, zeroing new bytes on growth. + let slen = ssLen(s) + let curLen = if slen > PayloadSize: s.more.fullLen else: slen + if newLen == curLen: return + if newLen <= 0: + if slen > PayloadSize: + if slen == HeapSlen and s.more.rc == 1: + s.more.fullLen = 0 + s.more.data[0] = '\0' + else: + # shared or static block: detach and go back to empty inline + nimDestroyStrV1(s) + s.bytes = 0 # slen=0, all inline chars zeroed + else: + s.bytes = 0 # slen=0, all inline chars zeroed (SWAR safe) + return + if slen <= PayloadSize: + if newLen <= PayloadSize: + let inl = inlinePtr(s) + if newLen > curLen: + zeroMem(addr inl[curLen], newLen - curLen) + inl[newLen] = '\0' + setSSLen(s, newLen) + else: + # Shrink: zero out padding bytes for SWAR invariant. + inl[newLen] = '\0' + if newLen < AlwaysAvail: + # Zero bytes newLen+1..AlwaysAvail-1 in `bytes` (chars newLen..AlwaysAvail-2 + # are now padding and must be 0 for SWAR comparison to work correctly). + when system.cpuEndian == littleEndian: + # LE: slen in bits 0-7; keep bits 0..(newLen+1)*8-1, clear the rest above. + let keepBits = (newLen + 1) * 8 + let charMask = ((uint(1) shl keepBits) - 1'u) and not 0xFF'u + s.bytes = (s.bytes and charMask) or uint(newLen) + else: + # BE: slen in the top byte; keep top (newLen+1) bytes, zero the rest below. + let discardBits = (AlwaysAvail - newLen) * 8 + let slenBit = 8 * (sizeof(uint) - 1) + let charMask = not ((uint(1) shl discardBits) - 1'u) and not (0xFF'u shl slenBit) + s.bytes = (s.bytes and charMask) or (uint(newLen) shl slenBit) + else: + setSSLen(s, newLen) + else: + # grow into long + let newCap = resize(newLen) + let p = cast[ptr LongString](alloc0(LongStringDataOffset + newCap + 1)) + p.rc = 1 + p.fullLen = newLen + p.capImpl = newCap + copyMem(addr p.data[0], inlinePtr(s), curLen) + # bytes [curLen..newLen] zeroed by alloc0; p.data[newLen] = '\0' by alloc0 + s.more = p + setSSLen(s, HeapSlen) + else: + # currently long + if newLen <= PayloadSize: + # shrink back to inline + let old = s.more + let inl = inlinePtr(s) + copyMem(inl, addr old.data[0], newLen) + inl[newLen] = '\0' + if slen == HeapSlen and atomicSubFetch(old.rc, 1) == 0: + dealloc(old) + # Zero padding bytes in `bytes` for SWAR invariant + if newLen < AlwaysAvail: + when system.cpuEndian == littleEndian: + let keepBits = (newLen + 1) * 8 + let charMask = ((uint(1) shl keepBits) - 1'u) and not 0xFF'u + s.bytes = (s.bytes and charMask) or uint(newLen) + else: + let discardBits = (AlwaysAvail - newLen) * 8 + let slenBit = 8 * (sizeof(uint) - 1) + let charMask = not ((uint(1) shl discardBits) - 1'u) and not (0xFF'u shl slenBit) + s.bytes = (s.bytes and charMask) or (uint(newLen) shl slenBit) + else: + setSSLen(s, newLen) + else: + ensureUniqueLong(s, curLen, newLen) + if newLen > curLen: + zeroMem(addr s.more.data[curLen], newLen - curLen) + s.more.data[newLen] = '\0' + s.more.fullLen = newLen + +proc nimAsgnStrV2(a: var SmallString; b: SmallString) {.compilerRtl, inline.} = + if ssLen(b) <= PayloadSize: + nimDestroyStrV1(a) # free any existing heap block before overwriting + copyMem(addr a, unsafeAddr b, sizeof(SmallString)) + else: + if addr(a) == unsafeAddr(b): return + nimDestroyStrV1(a) + # COW: share the block, bump refcount — no allocation needed (static literals: no bump) + if ssLenOf(b.bytes) == HeapSlen: + discard atomicAddFetch(b.more.rc, 1) + copyMem(addr a, unsafeAddr b, sizeof(SmallString)) + +proc nimPrepareStrMutationImpl(s: var SmallString) = + # Called when s holds a static (slen=StaticSlen) LongString block. COW: allocate fresh copy. + let old = s.more + let oldLen = old.fullLen + let p = cast[ptr LongString](alloc(LongStringDataOffset + oldLen + 1)) + p.rc = 1 + p.fullLen = oldLen + p.capImpl = oldLen + copyMem(addr p.data[0], addr old.data[0], oldLen + 1) + s.more = p + setSSLen(s, HeapSlen) # promote from static to heap-owned + +proc nimPrepareStrMutationV2(s: var SmallString) {.compilerRtl, inline.} = + if ssLen(s) == StaticSlen: + nimPrepareStrMutationImpl(s) + +proc prepareMutation*(s: var string) {.inline.} = + {.cast(noSideEffect).}: + nimPrepareStrMutationV2(cast[ptr SmallString](addr s)[]) + +proc nimStrAtMutV3*(s: var SmallString; i: int): var char {.compilerproc, inline.} = + ## Returns a mutable reference to the i-th char. Handles COW for long strings. + ## Used by the codegen when s[i] is passed as a `var char` argument. + if ssLen(s) > PayloadSize: + nimPrepareStrMutationV2(s) # COW: ensure unique heap block before exposing ref + result = s.more.data[i] + else: + result = inlinePtr(s)[i] + +proc nimAddStrV1(s: var SmallString; src: SmallString) {.compilerRtl, inline.} = + s.add(src) + +func capacity*(self: SmallString): int {.inline.} = + ## Returns the current capacity of the string. + let slen = ssLen(self) + if slen == HeapSlen: + self.more.capImpl + elif slen == StaticSlen: + self.more.fullLen # static: report fullLen as capacity (read-only, no extra room) + else: + PayloadSize + +proc nimStrLen(s: SmallString): int {.compilerproc, inline.} = + ## Returns the length of s. Called by the codegen for `mLen` on strings with -d:nimsso. + s.len + +proc nimStrData(s: var SmallString): ptr UncheckedArray[char] {.compilerproc, inline.} = + ## Returns a pointer to the char data of s. Called by codegen for subscript and slice with -d:nimsso. + if ssLen(s) > PayloadSize: cast[ptr UncheckedArray[char]](addr s.more.data[0]) + else: inlinePtr(s) + +const + newStringUninitWasDeclared = true + +proc newStringUninitImpl(len: Natural): string {.noSideEffect, inline.} = + ## Returns a new string of length `len` but with uninitialized content. + ## One needs to fill the string character after character + ## with the index operator `s[i]`. + ## + ## This procedure exists only for optimization purposes; + ## the same effect can be achieved with the `&` operator or with `add`. + when nimvm: + result = newString(len) + else: + result = newStringOfCap(len) # rawNewString: alloc (not alloc0) for long strings + {.cast(noSideEffect).}: + if len > 0: + let s = cast[ptr SmallString](addr result) + if len <= PayloadSize: + setSSLen(s[], len) + # Null-terminate; bytes [0..len-1] left uninitialized for caller to fill. + inlinePtr(s[])[len] = '\0' + else: + # rawNewString allocated with alloc (not alloc0), so data[0..len-1] is + # intentionally uninitialized. Caller fills it and calls completeStore. + s.more.fullLen = len + s.more.data[len] = '\0' + +proc completeStore(s: var SmallString) {.compilerproc, inline.} = + ## Must be called after bulk data has been written directly into the string buffer + ## via a raw pointer obtained from `nimStrData`/`nimStrAtMutV3` (e.g. `readBuffer`, + ## `moveMem`, `copyMem`). + ## + ## Syncs the hot prefix cache: copies `more.data[0..AlwaysAvail-1]` into + ## the inline bytes so that `cmp`/`==` can compare long strings + ## without a heap dereference for the first few bytes. + if ssLen(s) > PayloadSize: + copyMem(inlinePtr(s), addr s.more.data[0], AlwaysAvail) + +proc completeStore*(s: var string) {.inline.} = + completeStore(cast[ptr SmallString](addr s)[]) + +proc beginStore*(s: var string; ensuredLen: int; start = 0): ptr UncheckedArray[char] {.inline, noSideEffect, raises: [], tags: [].} = + ## Prepares `s` for a bulk write of `ensuredLen` bytes starting at `start`. + ## The caller must ensure `s.len >= start + ensuredLen` (e.g. via `newString` or `setLen`). + ## Call `endStore(s)` afterwards to sync the inline cache. + {.cast(noSideEffect).}: + let ss = cast[ptr SmallString](addr s) + let slen = ssLen(ss[]) + if slen > PayloadSize: + ensureUniqueLong(ss[], ss[].more.fullLen, ss[].more.fullLen) + result = cast[ptr UncheckedArray[char]](addr ss[].more.data[start]) + else: + result = cast[ptr UncheckedArray[char]](cast[uint](inlinePtr(ss[])) + uint(start)) + +proc endStore*(s: var string) {.inline, noSideEffect, raises: [], tags: [].} = + ## Syncs the inline cache after bulk writes via `beginStore`. No-op for short/medium strings. + {.cast(noSideEffect).}: completeStore(cast[ptr SmallString](addr s)[]) + +proc rawDataImpl(ss: ptr SmallString; start: int): ptr UncheckedArray[char] {.inline, noSideEffect, raises: [].} = + let slen = ssLen(ss[]) + let actualLen = if slen > PayloadSize: ss[].more.fullLen else: slen + if actualLen == 0: nil + elif slen > PayloadSize: cast[ptr UncheckedArray[char]](addr ss[].more.data[start]) + else: cast[ptr UncheckedArray[char]](cast[uint](inlinePtr(ss[])) + uint(start)) + +template readRawData*(s: string; start = 0): ptr UncheckedArray[char] = + ## Returns a pointer to `s[start]` for read-only raw access. + ## Template ensures no copy of `s` is made; ptr is valid while `s` is alive. + rawDataImpl(cast[ptr SmallString](unsafeAddr s), start) + +# These take `string` (tyString) so the codegen uses them directly, bypassing +# strmantle.nim's versions which go through nimStrLen/nimStrAtMutV3 compilerproc calls. +proc cmpStrings(a, b: string): int {.compilerproc, inline.} = + cmpStringPtrs(cast[ptr SmallString](unsafeAddr a), cast[ptr SmallString](unsafeAddr b)) + +proc eqStrings(a, b: string): bool {.compilerproc, inline.} = + cast[ptr SmallString](unsafeAddr a)[] == cast[ptr SmallString](unsafeAddr b)[] + +proc leStrings(a, b: string): bool {.compilerproc, inline.} = + cmpStrings(a, b) <= 0 + +proc ltStrings(a, b: string): bool {.compilerproc, inline.} = + cmpStrings(a, b) < 0 + +proc hashString(s: string): int {.compilerproc.} = + let ss = cast[ptr SmallString](unsafeAddr s)[] + let (L, data) = ss.guts + var h = 0'u + for i in 0.. 0" + if result.rounds <= 0: + quit "--rounds must be > 0" + if result.scenarios.len == 0: + quit "at least one scenario is required" + +proc scenarioName(s: Scenario): string = + ScenarioNames[s.ord] + +proc scenarioList(scenarios: openArray[Scenario]): string = + for i, scenario in scenarios: + if i > 0: + result.add ',' + result.add scenarioName(scenario) + +proc fixed(x: float; digits: range[0..32]): string = + formatFloat(x, ffDecimal, digits) + +proc randomChar(rng: var Rand): char = + Alphabet[rng.rand(Alphabet.high)] + +proc makeRandomString(rng: var Rand; len: int; prefix = ""): string = + result = newString(len) + var i = 0 + while i < len and i < prefix.len: + result[i] = prefix[i] + inc i + while i < len: + result[i] = randomChar(rng) + inc i + +proc pickMixedLength(rng: var Rand): int = + let bucket = rng.rand(0..99) + if bucket < 35: + result = rng.rand(1..AlwaysAvail) + elif bucket < 70: + result = rng.rand(AlwaysAvail + 1 .. InlineMax) + else: + result = rng.rand(InlineMax + 1 .. InlineMax + 48) + +proc makeScenarioString(rng: var Rand; kind: Scenario; serial: int): string = + case kind + of scShort: + result = makeRandomString(rng, rng.rand(1..AlwaysAvail)) + of scInline: + result = makeRandomString(rng, rng.rand(AlwaysAvail + 1 .. InlineMax)) + of scBoundary: + let choices = [ + max(1, InlineMax - 2), + max(1, InlineMax - 1), + InlineMax, + InlineMax + 1, + InlineMax + 2 + ] + result = makeRandomString(rng, choices[rng.rand(choices.high)]) + of scLong: + result = makeRandomString(rng, rng.rand(InlineMax + 1 .. InlineMax + 64)) + of scPrefix: + let prefix = SharedPrefixes[rng.rand(SharedPrefixes.high)] + let suffixLen = rng.rand(4..24) + result = makeRandomString(rng, prefix.len + suffixLen, prefix) + of scMixed: + result = makeRandomString(rng, pickMixedLength(rng)) + if kind == scPrefix and result.len > 0: + # Keep the shared-prefix workload adversarial on purpose. + result[^1] = char(ord('0') + (serial mod 10)) + +proc generateDataset(kind: Scenario; count: int; seed: int64): seq[string] = + var rng = initRand(seed + kind.ord.int64 * 10_000_019'i64) + result = newSeq[string](count) + for i in 0.. 0: + result = result xor (uint64(ord(pair.a[0])) shl (i and 7)) + if pair.b.len > 0: + result = result xor (uint64(ord(pair.b[^1])) shl ((i + 3) and 7)) + +proc bench(kind: Scenario; cfg: Config) = + let data = generateDataset(kind, cfg.count, cfg.seed) + let pairs = buildPairs(kind, data) + let avgLen = averageLen(data) + + var warm = 0 + for pair in pairs: + warm += system.cmp(pair.a, pair.b) + + var totalNs = 0.0 + var bestNs = Inf + var worstNs = 0.0 + var combined = uint64(cast[uint](warm)) xor pairChecksum(pairs) + + for round in 0.. 0" + if result.rounds <= 0: + quit "--rounds must be > 0" + +proc fixed(x: float; digits: range[0..32]): string = + formatFloat(x, ffDecimal, digits) + +proc makeName(rng: var Rand; serial: int): string = + result = FirstNames[rng.rand(FirstNames.high)] & "_" & + LastNames[(serial + rng.rand(LastNames.high)) mod LastNames.len] + +proc makeUrl(name: string; serial: int; score: int): string = + "https://data.example/api/u/" & name & "/" & $serial & + "?score=" & $score & "&src=csv" + +proc csvPath(cfg: Config): string = + getTempDir() / ("nim_csvbench_" & $cfg.rows & "_" & $cfg.seed & ".csv") + +proc writeCsv(path: string; cfg: Config) = + var rng = initRand(cfg.seed) + var f = open(path, fmWrite) + defer: close(f) + + f.writeLine("id,name,age,score,visits,zip,timestamp,url") + for i in 0.. 0: + result = result xor (uint64(ord(field[0])) shl (i and 7)) + result = result xor (uint64(ord(field[^1])) shl ((i + 3) and 7)) + +proc parseAndMaterialize(path: string; rowsExpected: int): tuple[elapsedNs: float, check: uint64] = + var parser: CsvParser + parser.open(path) + defer: parser.close() + parser.readHeaderRow() + + var rows = newSeqOfCap[StoredRow](rowsExpected) + let started = getMonoTime() + while parser.readRow(): + var row: StoredRow + row.id = parser.row[0] + row.name = parser.row[1] + row.age = parser.row[2] + row.score = parser.row[3] + row.visits = parser.row[4] + row.zip = parser.row[5] + row.timestamp = parser.row[6] + row.url = parser.row[7] + result.check = result.check * 0x9E3779B185EBCA87'u64 + checksum(row) + rows.add row + result.elapsedNs = float((getMonoTime() - started).inNanoseconds) + doAssert rows.len == rowsExpected + +proc main() = + let cfg = parseConfig() + let path = csvPath(cfg) + writeCsv(path, cfg) + defer: + if fileExists(path): + removeFile(path) + + let fileSize = getFileSize(path) + var warm = parseAndMaterialize(path, cfg.rows) + discard warm + + var totalNs = 0.0 + var bestNs = Inf + var worstNs = 0.0 + var combined = uint64(fileSize) + uint64(cfg.rows) + + for round in 0.. 0" + if result.rounds <= 0: + quit "--rounds must be > 0" + if result.scenarios.len == 0: + quit "at least one scenario is required" + +proc scenarioName(s: Scenario): string = + ScenarioNames[s.ord] + +proc scenarioList(scenarios: openArray[Scenario]): string = + for i, scenario in scenarios: + if i > 0: + result.add ',' + result.add scenarioName(scenario) + +proc fixed(x: float; digits: range[0..32]): string = + formatFloat(x, ffDecimal, digits) + +proc randomChar(rng: var Rand): char = + Alphabet[rng.rand(Alphabet.high)] + +proc makeRandomString(rng: var Rand; len: int; prefix = ""): string = + result = newString(len) + var i = 0 + while i < len and i < prefix.len: + result[i] = prefix[i] + inc i + while i < len: + result[i] = randomChar(rng) + inc i + +proc pickMixedLength(rng: var Rand): int = + let bucket = rng.rand(0..99) + if bucket < 35: + result = rng.rand(1..AlwaysAvail) + elif bucket < 70: + result = rng.rand(AlwaysAvail + 1 .. InlineMax) + else: + result = rng.rand(InlineMax + 1 .. InlineMax + 48) + +proc makeScenarioString(rng: var Rand; kind: Scenario; serial: int): string = + case kind + of scShort: + result = makeRandomString(rng, rng.rand(1..AlwaysAvail)) + of scInline: + result = makeRandomString(rng, rng.rand(AlwaysAvail + 1 .. InlineMax)) + of scBoundary: + let choices = [ + max(1, InlineMax - 2), + max(1, InlineMax - 1), + InlineMax, + InlineMax + 1, + InlineMax + 2 + ] + result = makeRandomString(rng, choices[rng.rand(choices.high)]) + of scLong: + result = makeRandomString(rng, rng.rand(InlineMax + 1 .. InlineMax + 64)) + of scPrefix: + let prefix = SharedPrefixes[rng.rand(SharedPrefixes.high)] + let suffixLen = rng.rand(4..24) + result = makeRandomString(rng, prefix.len + suffixLen, prefix) + of scMixed: + result = makeRandomString(rng, pickMixedLength(rng)) + + if result.len > 0: + result[0] = char(ord('a') + (serial mod 26)) + result[^1] = char(ord('0') + (serial mod 10)) + +proc generateDataset(kind: Scenario; count: int; seed: int64): seq[string] = + var rng = initRand(seed + kind.ord.int64 * 10_000_019'i64) + result = newSeq[string](count) + for i in 0.. 0: + result = result xor (uint64(ord(s[0])) shl (i and 7)) + result = result xor (uint64(ord(s[^1])) shl ((i + 3) and 7)) + +proc makeMissQueries(kind: Scenario; count: int; seed: int64): seq[string] = + result = generateDataset(kind, count, seed + 0x6A09E667'i64) + for i in 0..= 0 + doAssert warmMisses == 0 + + var insertTotalNs = 0.0 + var hitTotalNs = 0.0 + var missTotalNs = 0.0 + var insertBestNs = Inf + var hitBestNs = Inf + var missBestNs = Inf + var insertWorstNs = 0.0 + var hitWorstNs = 0.0 + var missWorstNs = 0.0 + var combined = keyCheck + uint64(cfg.count) + + for round in 0..= 0 + doAssert missSum == 0 + + insertTotalNs += insertNs + hitTotalNs += hitNs + missTotalNs += missNs + insertBestNs = min(insertBestNs, insertNs) + hitBestNs = min(hitBestNs, hitNs) + missBestNs = min(missBestNs, missNs) + insertWorstNs = max(insertWorstNs, insertNs) + hitWorstNs = max(hitWorstNs, hitNs) + missWorstNs = max(missWorstNs, missNs) + combined = combined * 0x9E3779B185EBCA87'u64 + + uint64(cast[uint](hitSum xor missSum xor round)) + let insertAvgNs = insertTotalNs / cfg.rounds.float + let hitAvgNs = hitTotalNs / cfg.rounds.float + let missAvgNs = missTotalNs / cfg.rounds.float + echo align(scenarioName(kind), 8), " n=", align($cfg.count, 8), + " avgLen=", align(fixed(avgLen, 1), 6), + " ins=", align(fixed(insertAvgNs / 1e6, 3), 9), " ms", + " hit=", align(fixed(hitAvgNs / 1e6, 3), 9), " ms", + " miss=", align(fixed(missAvgNs / 1e6, 3), 9), " ms", + " ns/op=", align(fixed((insertAvgNs + hitAvgNs + missAvgNs) / (3.0 * cfg.count.float), 1), 8), + " check=0x", toHex(combined, 16) + discard insertBestNs + discard hitBestNs + discard missBestNs + discard insertWorstNs + discard hitWorstNs + discard missWorstNs + +proc main() = + let cfg = parseConfig() + echo "inline limit=", InlineMax, " bytes count=", cfg.count, + " rounds=", cfg.rounds, " seed=", cfg.seed + echo "scenarios=", scenarioList(cfg.scenarios) + for scenario in cfg.scenarios: + bench(scenario, cfg) + when not defined(useMalloc): echo "MAXMEM=", formatSize getMaxMem() + +when isMainModule: + main() diff --git a/tests/benchmarks/strings/sortbench.nim b/tests/benchmarks/strings/sortbench.nim new file mode 100644 index 0000000000000..046804868b5cd --- /dev/null +++ b/tests/benchmarks/strings/sortbench.nim @@ -0,0 +1,224 @@ +import std/[algorithm, monotimes, os, random, strutils, times] + +const + AlwaysAvail = 7 + InlineMax = AlwaysAvail + sizeof(pointer) - 1 + Alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-" + SharedPrefixes = [ + "module/submodule/symbol/", + "compiler/semantic/checker/", + "core/runtime/string-table/", + "aaaaaaaaaaaaaa/shared/prefix/", + "zzzzzzzzzzzzzz/shared/prefix/" + ] + ScenarioNames = ["short", "inline", "boundary", "long", "prefix", "mixed"] + +type + Scenario = enum + scShort + scInline + scBoundary + scLong + scMixed + + Config = object + count: int + rounds: int + seed: int64 + scenarios: seq[Scenario] + +proc defaultConfig(): Config = + Config( + count: 200_000, + rounds: 5, + seed: 20260307'i64, + scenarios: @[scShort, scInline, scBoundary, scLong, scMixed] + ) + +proc usage() = + echo "String sorting benchmark for experimenting with the SSO runtime." + echo "" + echo "Usage:" + echo " nim r -d:danger sortbench.nim [--count=N] [--rounds=N] [--seed=N]" + echo " [--scenarios=list]" + echo "" + echo "Scenarios:" + echo " short, inline, boundary, long, prefix, mixed" + echo "" + echo "Current inline limit on this target: ", InlineMax, " bytes" + +proc parseScenario(name: string): Scenario = + case name.normalize + of "short": + scShort + of "inline": + scInline + of "boundary": + scBoundary + of "long": + scLong + of "mixed": + scMixed + else: + quit "unknown scenario: " & name + +proc parseConfig(): Config = + result = defaultConfig() + for arg in commandLineParams(): + if arg == "--help" or arg == "-h": + usage() + quit 0 + elif arg.startsWith("--count="): + result.count = parseInt(arg["--count=".len .. ^1]) + elif arg.startsWith("--rounds="): + result.rounds = parseInt(arg["--rounds=".len .. ^1]) + elif arg.startsWith("--seed="): + result.seed = parseInt(arg["--seed=".len .. ^1]).int64 + elif arg.startsWith("--scenarios="): + result.scenarios.setLen(0) + for item in arg["--scenarios=".len .. ^1].split(','): + if item.len > 0: + result.scenarios.add parseScenario(item) + else: + quit "unknown argument: " & arg + + if result.count <= 0: + quit "--count must be > 0" + if result.rounds <= 0: + quit "--rounds must be > 0" + if result.scenarios.len == 0: + quit "at least one scenario is required" + +proc scenarioName(s: Scenario): string = + ScenarioNames[s.ord] + +proc randomChar(rng: var Rand): char = + Alphabet[rng.rand(Alphabet.high)] + +proc makeRandomString(rng: var Rand; len: int): string = + result = newString(len) + var i = 0 + while i < len: + result[i] = randomChar(rng) + inc i + +proc pickMixedLength(rng: var Rand): int = + let bucket = rng.rand(0..99) + if bucket < 35: + result = rng.rand(1..AlwaysAvail) + elif bucket < 70: + result = rng.rand(AlwaysAvail + 1 .. InlineMax) + else: + result = rng.rand(InlineMax + 1 .. InlineMax + 48) + +proc makeScenarioString(rng: var Rand; kind: Scenario; serial: int): string = + case kind + of scShort: + result = makeRandomString(rng, rng.rand(1..AlwaysAvail)) + of scInline: + result = makeRandomString(rng, rng.rand(1 .. InlineMax)) + of scBoundary: + let choices = [ + max(1, InlineMax - 2), + max(1, InlineMax - 1), + InlineMax, + InlineMax + 1, + InlineMax + 2 + ] + result = makeRandomString(rng, choices[rng.rand(choices.high)]) + of scLong: + result = makeRandomString(rng, rng.rand(InlineMax + 1 .. InlineMax + 64)) + of scMixed: + result = makeRandomString(rng, pickMixedLength(rng)) + + # Inject a little deterministic structure so equal prefixes are common but not identical. + if result.len > 0: + result[0] = char(ord('a') + (serial mod 26)) + result[^1] = char(ord('0') + (serial mod 10)) + +proc generateDataset(kind: Scenario; count: int; seed: int64): seq[string] = + var rng = initRand(seed + kind.ord.int64 * 10_000_019'i64) + result = newSeq[string](count) + for i in 0.. 0: + return false + result = true + +proc checksum(a: openArray[string]): uint64 = + for i, s in a: + result = result * 0x9E3779B185EBCA87'u64 + uint64(s.len) + if s.len > 0: + result = result xor (uint64(ord(s[0])) shl (i and 7)) + result = result xor (uint64(ord(s[^1])) shl ((i + 3) and 7)) + +proc averageLen(data: openArray[string]): float = + var total = 0 + for s in data: + total += s.len + result = total.float / max(1, data.len).float + +proc scenarioList(scenarios: openArray[Scenario]): string = + for i, scenario in scenarios: + if i > 0: + result.add ',' + result.add scenarioName(scenario) + +proc fixed(x: float; digits: range[0..32]): string = + formatFloat(x, ffDecimal, digits) + +proc bench(kind: Scenario; cfg: Config) = + let data = generateDataset(kind, cfg.count, cfg.seed) + let avgLen = averageLen(data) + + var warmup = cloneStrings(data) + warmup.sort(system.cmp) + doAssert isSorted(warmup) + + var totalNs = 0.0 + var bestNs = Inf + var worstNs = 0.0 + var combinedChecksum = 0'u64 + + for round in 0..