microsoft · Andarist · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026 · May 18, 2026
diff --git a/internal/checker/checker.go b/internal/checker/checker.go
@@ -28880,15 +28880,15 @@ func (c *Checker) getStringMappingType(symbol *ast.Symbol, t *Type) *Type {
 func applyStringMapping(symbol *ast.Symbol, str string) string {
 	switch intrinsicTypeKinds[symbol.Name] {
 	case IntrinsicTypeKindUppercase:
-		return strings.ToUpper(str)
+		return stringutil.ToUpperJS(str)
 	case IntrinsicTypeKindLowercase:
-		return strings.ToLower(str)
+		return stringutil.ToLowerJS(str)
 	case IntrinsicTypeKindCapitalize:
 		_, size := utf8.DecodeRuneInString(str)
-		return strings.ToUpper(str[:size]) + str[size:]
+		return stringutil.ToUpperJS(str[:size]) + str[size:]
 	case IntrinsicTypeKindUncapitalize:
 		_, size := utf8.DecodeRuneInString(str)
-		return strings.ToLower(str[:size]) + str[size:]
+		return stringutil.ToLowerJS(str[:size]) + str[size:]
 	}
 	return str
 }

diff --git a/internal/scanner/scanner.go b/internal/scanner/scanner.go
@@ -2179,35 +2179,11 @@ func IsIdentifierPartEx(ch rune, languageVariant core.LanguageVariant) bool {
 }
 
 func isUnicodeIdentifierStart(ch rune) bool {
-	return isInUnicodeRanges(ch, unicodeESNextIdentifierStart)
+	return stringutil.IsInRuneRanges(ch, unicodeESNextIdentifierStart)
 }
 
 func isUnicodeIdentifierPart(ch rune) bool {
-	return isInUnicodeRanges(ch, unicodeESNextIdentifierPart)
-}
-
-func isInUnicodeRanges(cp rune, ranges []rune) bool {
-	// Bail out quickly if it couldn't possibly be in the map
-	if cp < ranges[0] {
-		return false
-	}
-	// Perform binary search in one of the Unicode range maps
-	lo := 0
-	hi := len(ranges)
-	for lo+1 < hi {
-		mid := lo + (hi-lo)/2
-		// mid has to be even to catch beginning of a range
-		mid -= mid % 2
-		if ranges[mid] <= cp && cp <= ranges[mid+1] {
-			return true
-		}
-		if cp < ranges[mid] {
-			hi = mid
-		} else {
-			lo = mid + 2
-		}
-	}
-	return false
+	return stringutil.IsInRuneRanges(ch, unicodeESNextIdentifierPart)
 }
 
 var tokenToText = func() [ast.KindCount]string {

diff --git a/internal/stringutil/_scripts/generate-special-casing.mts b/internal/stringutil/_scripts/generate-special-casing.mts
@@ -0,0 +1,223 @@
+#!/usr/bin/env -S node --experimental-strip-types --no-warnings
+
+import * as fs from "fs";
+import * as path from "path";
+
+const OUTPUT_PATH = path.join(import.meta.dirname, "..", "js_case_generated.go");
+// Keep the generated property tables aligned with the V8/ICU Unicode data we
+// validate against, rather than "latest", so Final_Sigma context does not get
+// ahead of the runtime behavior this package is trying to emulate.
+const UNICODE_VERSION = "15.0.0";
+const SPECIAL_CASING_URL = `https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/SpecialCasing.txt`;
+const DERIVED_CORE_PROPERTIES_URL = `https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/DerivedCoreProperties.txt`;
+
+const knownContextConditions = new Set([
+    "Final_Sigma",
+    "After_Soft_Dotted",
+    "More_Above",
+    "After_I",
+    "Not_Before_Dot",
+]);
+
+const knownLocaleConditions = new Set([
+    "az",
+    "lt",
+    "tr",
+]);
+
+type SpecialCasingEntry = {
+    codePoint: number;
+    lower: number[];
+    upper: number[];
+    condition: string;
+    comment: string;
+};
+
+type Range = {
+    start: number;
+    end: number;
+};
+
+function assert(condition: unknown, message: string): asserts condition {
+    if (!condition) {
+        throw new Error(message);
+    }
+}
+
+async function fetchText(url: string): Promise<string> {
+    const response = await fetch(url);
+    if (!response.ok) {
+        throw new Error(`Failed to fetch ${url}: ${response.status} ${response.statusText}`);
+    }
+    return await response.text();
+}
+
+function parseCodePointList(field: string): number[] {
+    const trimmed = field.trim();
+    if (!trimmed) return [];
+    return trimmed.split(/\s+/).map(codePoint => parseInt(codePoint, 16));
+}
+
+function parseRange(field: string): Range {
+    const [startHex, endHex] = field.split("..");
+    const start = parseInt(startHex, 16);
+    const end = endHex ? parseInt(endHex, 16) : start;
+    return { start, end };
+}
+
+function goRuneLiteral(codePoint: number): string {
+    return `0x${codePoint.toString(16).toUpperCase()}`;
+}
+
+function goStringLiteral(codePoints: number[]): string {
+    let text = '"';
+    for (const codePoint of codePoints) {
+        if (codePoint <= 0xFFFF) {
+            text += `\\u${codePoint.toString(16).toUpperCase().padStart(4, "0")}`;
+        }
+        else {
+            text += `\\U${codePoint.toString(16).toUpperCase().padStart(8, "0")}`;
+        }
+    }
+    text += '"';
+    return text;
+}
+
+function parseSpecialCasing(text: string): { unicodeVersion: string; entries: SpecialCasingEntry[]; } {
+    const entries: SpecialCasingEntry[] = [];
+    let unicodeVersion = "unknown";
+
+    for (const line of text.split(/\r?\n/)) {
+        const versionMatch = line.match(/^# SpecialCasing-(.+)\.txt$/);
+        if (versionMatch) {
+            unicodeVersion = versionMatch[1];
+            continue;
+        }
+
+        const trimmed = line.trim();
+        if (!trimmed || trimmed.startsWith("#")) {
+            continue;
+        }
+
+        const [data, comment = ""] = line.split("#", 2);
+        const parts = data.split(";").map(part => part.trim());
+        assert(parts.length >= 4, `Malformed SpecialCasing row: ${line}`);
+
+        const [codeField, lowerField, _titleField, upperField, conditionField = ""] = parts;
+        const code = parseCodePointList(codeField);
+        assert(code.length === 1, `Expected single code point in SpecialCasing row: ${line}`);
+
+        let hasLocaleCondition = false;
+        let condition = "specialCasingConditionNone";
+        let sawContextCondition = false;
+
+        for (const token of conditionField.split(/\s+/).filter(Boolean)) {
+            if (knownContextConditions.has(token)) {
+                sawContextCondition = true;
+                if (token === "Final_Sigma") {
+                    condition = "specialCasingConditionFinalSigma";
+                }
+                continue;
+            }
+            if (knownLocaleConditions.has(token.toLowerCase())) {
+                hasLocaleCondition = true;
+                continue;
+            }
+            throw new Error(`Unknown SpecialCasing condition token: ${token}`);
+        }
+
+        if (hasLocaleCondition) {
+            continue;
+        }
+        if (sawContextCondition && condition === "specialCasingConditionNone") {
+            throw new Error(`Unsupported locale-insensitive context-only SpecialCasing row: ${line}`);
+        }
+
+        entries.push({
+            codePoint: code[0],
+            lower: parseCodePointList(lowerField),
+            upper: parseCodePointList(upperField),
+            condition,
+            comment: comment.trim(),
+        });
+    }
+
+    return { unicodeVersion, entries };
+}
+
+function parseDerivedCorePropertyRanges(text: string, propertyName: string): Range[] {
+    const ranges: Range[] = [];
+
+    for (const line of text.split(/\r?\n/)) {
+        const trimmed = line.trim();
+        if (!trimmed || trimmed.startsWith("#")) {
+            continue;
+        }
+
+        const [data] = line.split("#", 1);
+        const parts = data.split(";").map(part => part.trim());
+        if (parts.length < 2 || parts[1] !== propertyName) {
+            continue;
+        }
+
+        ranges.push(parseRange(parts[0]));
+    }
+
+    return ranges;
+}
+
+function renderRanges(name: string, ranges: Range[]): string {
+    const values = ranges.flatMap(range => [goRuneLiteral(range.start), goRuneLiteral(range.end)]).join(", ");
+    return `var ${name} = []rune{${values}}\n`;
+}
+
+function render(unicodeVersion: string, entries: SpecialCasingEntry[], casedRanges: Range[], caseIgnorableRanges: Range[], lowercaseRanges: Range[], uppercaseRanges: Range[]): string {
+    const mappings = entries.map(entry => `\t${goRuneLiteral(entry.codePoint)}: {lower: ${goStringLiteral(entry.lower)}, upper: ${goStringLiteral(entry.upper)}, condition: ${entry.condition}}, // ${entry.comment}`).join("\n");
+
+    return `// Code generated by internal/stringutil/_scripts/generate-special-casing.mts. DO NOT EDIT.
+// Based on Unicode SpecialCasing.txt and DerivedCoreProperties.txt (${unicodeVersion}).
+// Includes only the locale-insensitive mappings needed for ECMAScript default casing.
+// Go's unicode package handles simple one-rune mappings, but not these multi-rune
+// mappings or the DerivedCoreProperties data needed for Final_Sigma handling.
+
+package stringutil
+
+type specialCasingCondition uint8
+
+const (
+\tspecialCasingConditionNone specialCasingCondition = iota
+\tspecialCasingConditionFinalSigma
+)
+
+type specialCasingMapping struct {
+\tlower     string
+\tupper     string
+\tcondition specialCasingCondition
+}
+
+var specialCasingMappings = map[rune]specialCasingMapping{
+${mappings}
+}
+
+${renderRanges("unicodeCasedRanges", casedRanges)}
+${renderRanges("unicodeCaseIgnorableRanges", caseIgnorableRanges)}
+${renderRanges("unicodeLowercaseRanges", lowercaseRanges)}
+${renderRanges("unicodeUppercaseRanges", uppercaseRanges)}
+`;
+}
+
+async function main() {
+    const [specialCasingText, derivedCorePropertiesText] = await Promise.all([
+        fetchText(SPECIAL_CASING_URL),
+        fetchText(DERIVED_CORE_PROPERTIES_URL),
+    ]);
+
+    const { unicodeVersion, entries } = parseSpecialCasing(specialCasingText);
+    const casedRanges = parseDerivedCorePropertyRanges(derivedCorePropertiesText, "Cased");
+    const caseIgnorableRanges = parseDerivedCorePropertyRanges(derivedCorePropertiesText, "Case_Ignorable");
+    const lowercaseRanges = parseDerivedCorePropertyRanges(derivedCorePropertiesText, "Lowercase");
+    const uppercaseRanges = parseDerivedCorePropertyRanges(derivedCorePropertiesText, "Uppercase");
+    fs.writeFileSync(OUTPUT_PATH, render(unicodeVersion, entries, casedRanges, caseIgnorableRanges, lowercaseRanges, uppercaseRanges));
+}
+
+await main();
diff --git a/internal/stringutil/generate.go b/internal/stringutil/generate.go
@@ -0,0 +1,4 @@
+package stringutil
+
+//go:generate node --experimental-strip-types --no-warnings ./_scripts/generate-special-casing.mts
+//go:generate npx dprint fmt js_case_generated.go