Zheruel
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 18 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 144 additions & 28 deletions b/‎README.md‎
Lines changed: 144 additions & 28 deletions
diff --git a/‎jsr.json‎
Lines changed: 1 addition & 1 deletion b/‎jsr.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎package.json‎
Lines changed: 1 addition & 1 deletion b/‎package.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/codePoints.ts‎
Lines changed: 20 additions & 0 deletions b/‎src/codePoints.ts‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎src/graphemes.ts‎
Lines changed: 25 additions & 0 deletions b/‎src/graphemes.ts‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎src/index.ts‎
Lines changed: 12 additions & 0 deletions b/‎src/index.ts‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎src/isASCII.ts‎
Lines changed: 19 additions & 0 deletions b/‎src/isASCII.ts‎
Lines changed: 19 additions & 0 deletions
@@ -37,3 +37,6 @@ tmp/
 temp/
 # TypeDoc generated documentation
 docs/
+
+# Claude
+CLAUDE.MD
@@ -7,6 +7,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.3.0] - 2025-09-03
+
+### Added
+- `codePoints` - Convert strings into arrays of Unicode code points
+- `graphemes` - Split strings into grapheme clusters (emoji-aware)
+- `isASCII` - Check if string contains only ASCII characters
+- `toASCII` - Convert strings to ASCII-safe representation with transliteration
+- `normalizeWhitespace` - Normalize various Unicode whitespace characters
+- `removeNonPrintable` - Remove control and formatting characters
+
+### Enhanced
+- Comprehensive Unicode support across all new utilities
+- Support for complex emoji sequences and combining characters
+- Configurable options for whitespace normalization and character removal
+- Greek and Cyrillic transliteration in toASCII
+- Smart symbol conversion (quotes, dashes, fractions, currency)
+
 ## [0.2.0] - 2025-09-02
 
 ### Added
@@ -55,5 +72,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - 100% test coverage for utility functions
 - Modern build tooling with tsup and Vitest
 
+[0.3.0]: https://github.com/Zheruel/nano-string-utils/releases/tag/v0.3.0
 [0.2.0]: https://github.com/Zheruel/nano-string-utils/releases/tag/v0.2.0
 [0.1.0]: https://github.com/Zheruel/nano-string-utils/releases/tag/v0.1.0
@@ -286,6 +286,47 @@ wordCount("Hello world test"); // 3
 wordCount("One-word counts as one"); // 5
 ```
 
+#### `normalizeWhitespace(str: string, options?: NormalizeWhitespaceOptions): string`
+
+Normalizes various Unicode whitespace characters to regular spaces.
+
+```javascript
+normalizeWhitespace("hello   world"); // 'hello world'
+normalizeWhitespace("hello\u00A0world"); // 'hello world' (non-breaking space)
+normalizeWhitespace("  hello  "); // 'hello'
+normalizeWhitespace("hello\n\nworld"); // 'hello world'
+
+// With options
+normalizeWhitespace("  hello  ", { trim: false }); // ' hello '
+normalizeWhitespace("a    b", { collapse: false }); // 'a    b'
+normalizeWhitespace("hello\n\nworld", { preserveNewlines: true }); // 'hello\n\nworld'
+
+// Handles various Unicode spaces
+normalizeWhitespace("café\u2003test"); // 'café test' (em space)
+normalizeWhitespace("hello\u200Bworld"); // 'hello world' (zero-width space)
+normalizeWhitespace("日本\u3000語"); // '日本 語' (ideographic space)
+```
+
+#### `removeNonPrintable(str: string, options?: RemoveNonPrintableOptions): string`
+
+Removes non-printable control characters and formatting characters from strings.
+
+```javascript
+removeNonPrintable("hello\x00world"); // 'helloworld' (removes NULL character)
+removeNonPrintable("hello\nworld"); // 'helloworld' (removes newline by default)
+removeNonPrintable("hello\u200Bworld"); // 'helloworld' (removes zero-width space)
+removeNonPrintable("hello\u202Dworld"); // 'helloworld' (removes directional override)
+
+// With options
+removeNonPrintable("hello\nworld", { keepNewlines: true }); // 'hello\nworld'
+removeNonPrintable("hello\tworld", { keepTabs: true }); // 'hello\tworld'
+removeNonPrintable("hello\r\nworld", { keepCarriageReturns: true }); // 'hello\rworld'
+
+// Preserves emoji with zero-width joiners
+removeNonPrintable("👨‍👩‍👧‍👦"); // '👨‍👩‍👧‍👦' (family emoji preserved)
+removeNonPrintable("text\x1B[32mgreen\x1B[0m"); // 'text[32mgreen[0m' (ANSI escapes removed)
+```
+
 #### `pad(str: string, length: number, chars?: string): string`
 
 Pads a string to a given length by adding characters to both sides (centers the string).
@@ -316,6 +357,32 @@ padEnd("Hi", 6, "=-"); // 'Hi=-=-'
 padEnd("5", 3, "0"); // '500'
 ```
 
+#### `graphemes(str: string): string[]`
+
+Splits a string into an array of grapheme clusters, properly handling emojis, combining characters, and complex Unicode.
+
+```javascript
+graphemes("hello"); // ['h', 'e', 'l', 'l', 'o']
+graphemes("👨‍👩‍👧‍👦🎈"); // ['👨‍👩‍👧‍👦', '🎈']
+graphemes("café"); // ['c', 'a', 'f', 'é']
+graphemes("👍🏽"); // ['👍🏽'] - emoji with skin tone
+graphemes("🇺🇸"); // ['🇺🇸'] - flag emoji
+graphemes("hello👋world"); // ['h', 'e', 'l', 'l', 'o', '👋', 'w', 'o', 'r', 'l', 'd']
+```
+
+#### `codePoints(str: string): number[]`
+
+Converts a string into an array of Unicode code points, properly handling surrogate pairs and complex characters.
+
+```javascript
+codePoints("hello"); // [104, 101, 108, 108, 111]
+codePoints("👍"); // [128077]
+codePoints("€"); // [8364]
+codePoints("Hello 👋"); // [72, 101, 108, 108, 111, 32, 128075]
+codePoints("a👍b"); // [97, 128077, 98]
+codePoints("👨‍👩‍👧‍👦"); // [128104, 8205, 128105, 8205, 128103, 8205, 128102]
+```
+
 ### String Generation
 
 #### `randomString(length: number, charset?: string): string`
@@ -360,38 +427,87 @@ isUrl("not a url"); // false
 isUrl("ftp://files.com/file.zip"); // true
 ```
 
+#### `isASCII(str: string): boolean`
+
+Checks if a string contains only ASCII characters (code points 0-127).
+
+```javascript
+isASCII("Hello World!"); // true
+isASCII("café"); // false
+isASCII("👍"); // false
+isASCII("abc123!@#"); // true
+isASCII(""); // true
+```
+
+#### `toASCII(str: string, options?: { placeholder?: string }): string`
+
+Converts a string to ASCII-safe representation by removing diacritics, converting common Unicode symbols, and optionally replacing non-ASCII characters.
+
+```javascript
+toASCII("café"); // 'cafe'
+toASCII("Hello "world""); // 'Hello "world"'
+toASCII("em—dash"); // 'em-dash'
+toASCII("€100"); // 'EUR100'
+toASCII("½ + ¼ = ¾"); // '1/2 + 1/4 = 3/4'
+toASCII("→ ← ↑ ↓"); // '-> <- ^ v'
+toASCII("α β γ"); // 'a b g'
+toASCII("Привет"); // 'Privet'
+toASCII("你好"); // '' (removes non-convertible characters)
+toASCII("你好", { placeholder: "?" }); // '??'
+toASCII("Hello 世界", { placeholder: "?" }); // 'Hello ??'
+toASCII("© 2024 Müller™"); // '(c) 2024 Muller(TM)'
+```
+
+Features:
+
+- Removes diacritics/accents (café → cafe)
+- Converts smart quotes to regular quotes
+- Converts Unicode dashes to hyphens
+- Converts mathematical symbols (≈ → ~, ≠ → !=)
+- Converts currency symbols (€ → EUR, £ → GBP)
+- Converts fractions (½ → 1/2)
+- Transliterates common Greek and Cyrillic letters
+- Handles emojis and multi-byte Unicode correctly
+- Optional placeholder for non-convertible characters
+
 ## Bundle Size
 
 Each utility is optimized to be as small as possible:
 
-| Function     | Size (minified) |
-| ------------ | --------------- |
-| slugify      | ~200 bytes      |
-| camelCase    | ~250 bytes      |
-| snakeCase    | ~220 bytes      |
-| kebabCase    | ~200 bytes      |
-| pascalCase   | ~180 bytes      |
-| constantCase | ~230 bytes      |
-| dotCase      | ~210 bytes      |
-| pathCase     | ~210 bytes      |
-| sentenceCase | ~280 bytes      |
-| titleCase    | ~320 bytes      |
-| capitalize   | ~100 bytes      |
-| truncate     | ~150 bytes      |
-| stripHtml    | ~120 bytes      |
-| escapeHtml   | ~180 bytes      |
-| randomString | ~200 bytes      |
-| hashString   | ~150 bytes      |
-| reverse      | ~80 bytes       |
-| deburr       | ~200 bytes      |
-| isEmail      | ~180 bytes      |
-| isUrl        | ~200 bytes      |
-| wordCount    | ~100 bytes      |
-| template     | ~350 bytes      |
-| templateSafe | ~400 bytes      |
-| pad          | ~180 bytes      |
-| padStart     | ~150 bytes      |
-| padEnd       | ~150 bytes      |
+| Function            | Size (minified) |
+| ------------------- | --------------- |
+| slugify             | ~200 bytes      |
+| camelCase           | ~250 bytes      |
+| snakeCase           | ~220 bytes      |
+| kebabCase           | ~200 bytes      |
+| pascalCase          | ~180 bytes      |
+| constantCase        | ~230 bytes      |
+| dotCase             | ~210 bytes      |
+| pathCase            | ~210 bytes      |
+| sentenceCase        | ~280 bytes      |
+| titleCase           | ~320 bytes      |
+| capitalize          | ~100 bytes      |
+| truncate            | ~150 bytes      |
+| stripHtml           | ~120 bytes      |
+| escapeHtml          | ~180 bytes      |
+| randomString        | ~200 bytes      |
+| hashString          | ~150 bytes      |
+| reverse             | ~80 bytes       |
+| deburr              | ~200 bytes      |
+| isEmail             | ~180 bytes      |
+| isUrl               | ~200 bytes      |
+| isASCII             | ~100 bytes      |
+| toASCII             | ~450 bytes      |
+| wordCount           | ~100 bytes      |
+| normalizeWhitespace | ~280 bytes      |
+| removeNonPrintable  | ~200 bytes      |
+| template            | ~350 bytes      |
+| templateSafe        | ~400 bytes      |
+| pad                 | ~180 bytes      |
+| padStart            | ~150 bytes      |
+| padEnd              | ~150 bytes      |
+| graphemes           | ~250 bytes      |
+| codePoints          | ~120 bytes      |
 
 Total package size: **< 5KB** minified + gzipped
 
 
@@ -1,6 +1,6 @@
 {
   "name": "@zheruel/nano-string-utils",
-  "version": "0.2.0",
+  "version": "0.3.0",
   "exports": "./src/index.ts",
   "publish": {
     "include": [
 
@@ -1,6 +1,6 @@
 {
   "name": "nano-string-utils",
-  "version": "0.2.0",
+  "version": "0.3.0",
   "description": "Ultra-lightweight string utilities with zero dependencies",
   "type": "module",
   "main": "./dist/index.cjs",
 
@@ -0,0 +1,20 @@
+/**
+ * Converts a string into an array of Unicode code points
+ * @param str - The input string
+ * @returns An array of Unicode code point numbers
+ * @example
+ * codePoints('hello') // [104, 101, 108, 108, 111]
+ * codePoints('👍') // [128077]
+ * codePoints('€') // [8364]
+ * codePoints('a👍b') // [97, 128077, 98]
+ */
+export function codePoints(str: string): number[] {
+  const points: number[] = [];
+  for (const char of str) {
+    const point = char.codePointAt(0);
+    if (point !== undefined) {
+      points.push(point);
+    }
+  }
+  return points;
+}
@@ -0,0 +1,25 @@
+/**
+ * Split a string into an array of grapheme clusters
+ * Handles emojis, combining characters, and other complex Unicode properly
+ * @param str - The string to split
+ * @returns Array of grapheme clusters
+ * @example
+ * graphemes('👨‍👩‍👧‍👦🎈') // ['👨‍👩‍👧‍👦', '🎈']
+ * graphemes('café') // ['c', 'a', 'f', 'é']
+ * graphemes('hello') // ['h', 'e', 'l', 'l', 'o']
+ */
+export function graphemes(str: string): string[] {
+  if (!str) return [];
+
+  // Use Intl.Segmenter for proper grapheme cluster splitting
+  if (typeof Intl !== "undefined" && "Segmenter" in Intl) {
+    const segmenter = new Intl.Segmenter(undefined, {
+      granularity: "grapheme",
+    });
+    return Array.from(segmenter.segment(str), (segment) => segment.segment);
+  }
+
+  // Simple fallback for environments without Intl.Segmenter
+  // This won't handle complex emojis properly but works for basic text
+  return Array.from(str);
+}
@@ -24,3 +24,15 @@ export { titleCase, type TitleCaseOptions } from "./titleCase.js";
 export { constantCase } from "./constantCase.js";
 export { dotCase } from "./dotCase.js";
 export { pathCase } from "./pathCase.js";
+export { graphemes } from "./graphemes.js";
+export { codePoints } from "./codePoints.js";
+export { isASCII } from "./isASCII.js";
+export {
+  normalizeWhitespace,
+  type NormalizeWhitespaceOptions,
+} from "./normalizeWhitespace.js";
+export {
+  removeNonPrintable,
+  type RemoveNonPrintableOptions,
+} from "./removeNonPrintable.js";
+export { toASCII, type ToASCIIOptions } from "./toASCII.js";
@@ -0,0 +1,19 @@
+/**
+ * Checks if a string contains only ASCII characters (code points 0-127)
+ * @param str - The input string to check
+ * @returns True if the string contains only ASCII characters, false otherwise
+ * @example
+ * isASCII('Hello World!') // true
+ * isASCII('café') // false
+ * isASCII('👍') // false
+ * isASCII('abc123!@#') // true
+ * isASCII('') // true
+ */
+export function isASCII(str: string): boolean {
+  for (let i = 0; i < str.length; i++) {
+    if (str.charCodeAt(i) > 127) {
+      return false;
+    }
+  }
+  return true;
+}
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@zheruel/nano-string-utils",`
`3`		`- "version": "0.2.0",`
	`3`	`+ "version": "0.3.0",`
`4`	`4`	`"exports": "./src/index.ts",`
`5`	`5`	`"publish": {`
`6`	`6`	`"include": [`
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "nano-string-utils",`
`3`		`- "version": "0.2.0",`
	`3`	`+ "version": "0.3.0",`
`4`	`4`	`"description": "Ultra-lightweight string utilities with zero dependencies",`
`5`	`5`	`"type": "module",`
`6`	`6`	`"main": "./dist/index.cjs",`