@@ -286,6 +286,47 @@ wordCount("Hello world test"); // 3
286286wordCount (" One-word counts as one" ); // 5
287287```
288288
289+ #### ` normalizeWhitespace(str: string, options?: NormalizeWhitespaceOptions): string `
290+
291+ Normalizes various Unicode whitespace characters to regular spaces.
292+
293+ ``` javascript
294+ normalizeWhitespace (" hello world" ); // 'hello world'
295+ normalizeWhitespace (" hello\u00A0 world" ); // 'hello world' (non-breaking space)
296+ normalizeWhitespace (" hello " ); // 'hello'
297+ normalizeWhitespace (" hello\n\n world" ); // 'hello world'
298+
299+ // With options
300+ normalizeWhitespace (" hello " , { trim: false }); // ' hello '
301+ normalizeWhitespace (" a b" , { collapse: false }); // 'a b'
302+ normalizeWhitespace (" hello\n\n world" , { preserveNewlines: true }); // 'hello\n\nworld'
303+
304+ // Handles various Unicode spaces
305+ normalizeWhitespace (" café\u2003 test" ); // 'café test' (em space)
306+ normalizeWhitespace (" hello\u200B world" ); // 'hello world' (zero-width space)
307+ normalizeWhitespace (" 日本\u3000 語" ); // '日本 語' (ideographic space)
308+ ```
309+
310+ #### ` removeNonPrintable(str: string, options?: RemoveNonPrintableOptions): string `
311+
312+ Removes non-printable control characters and formatting characters from strings.
313+
314+ ``` javascript
315+ removeNonPrintable (" hello\x00 world" ); // 'helloworld' (removes NULL character)
316+ removeNonPrintable (" hello\n world" ); // 'helloworld' (removes newline by default)
317+ removeNonPrintable (" hello\u200B world" ); // 'helloworld' (removes zero-width space)
318+ removeNonPrintable (" hello\u202D world" ); // 'helloworld' (removes directional override)
319+
320+ // With options
321+ removeNonPrintable (" hello\n world" , { keepNewlines: true }); // 'hello\nworld'
322+ removeNonPrintable (" hello\t world" , { keepTabs: true }); // 'hello\tworld'
323+ removeNonPrintable (" hello\r\n world" , { keepCarriageReturns: true }); // 'hello\rworld'
324+
325+ // Preserves emoji with zero-width joiners
326+ removeNonPrintable (" 👨👩👧👦" ); // '👨👩👧👦' (family emoji preserved)
327+ removeNonPrintable (" text\x1B [32mgreen\x1B [0m" ); // 'text[32mgreen[0m' (ANSI escapes removed)
328+ ```
329+
289330#### ` pad(str: string, length: number, chars?: string): string `
290331
291332Pads a string to a given length by adding characters to both sides (centers the string).
@@ -316,6 +357,32 @@ padEnd("Hi", 6, "=-"); // 'Hi=-=-'
316357padEnd (" 5" , 3 , " 0" ); // '500'
317358```
318359
360+ #### ` graphemes(str: string): string[] `
361+
362+ Splits a string into an array of grapheme clusters, properly handling emojis, combining characters, and complex Unicode.
363+
364+ ``` javascript
365+ graphemes (" hello" ); // ['h', 'e', 'l', 'l', 'o']
366+ graphemes (" 👨👩👧👦🎈" ); // ['👨👩👧👦', '🎈']
367+ graphemes (" café" ); // ['c', 'a', 'f', 'é']
368+ graphemes (" 👍🏽" ); // ['👍🏽'] - emoji with skin tone
369+ graphemes (" 🇺🇸" ); // ['🇺🇸'] - flag emoji
370+ graphemes (" hello👋world" ); // ['h', 'e', 'l', 'l', 'o', '👋', 'w', 'o', 'r', 'l', 'd']
371+ ```
372+
373+ #### ` codePoints(str: string): number[] `
374+
375+ Converts a string into an array of Unicode code points, properly handling surrogate pairs and complex characters.
376+
377+ ``` javascript
378+ codePoints (" hello" ); // [104, 101, 108, 108, 111]
379+ codePoints (" 👍" ); // [128077]
380+ codePoints (" €" ); // [8364]
381+ codePoints (" Hello 👋" ); // [72, 101, 108, 108, 111, 32, 128075]
382+ codePoints (" a👍b" ); // [97, 128077, 98]
383+ codePoints (" 👨👩👧👦" ); // [128104, 8205, 128105, 8205, 128103, 8205, 128102]
384+ ```
385+
319386### String Generation
320387
321388#### ` randomString(length: number, charset?: string): string `
@@ -360,38 +427,87 @@ isUrl("not a url"); // false
360427isUrl (" ftp://files.com/file.zip" ); // true
361428```
362429
430+ #### ` isASCII(str: string): boolean `
431+
432+ Checks if a string contains only ASCII characters (code points 0-127).
433+
434+ ``` javascript
435+ isASCII (" Hello World!" ); // true
436+ isASCII (" café" ); // false
437+ isASCII (" 👍" ); // false
438+ isASCII (" abc123!@#" ); // true
439+ isASCII (" " ); // true
440+ ```
441+
442+ #### ` toASCII(str: string, options?: { placeholder?: string }): string `
443+
444+ Converts a string to ASCII-safe representation by removing diacritics, converting common Unicode symbols, and optionally replacing non-ASCII characters.
445+
446+ ``` javascript
447+ toASCII (" café" ); // 'cafe'
448+ toASCII (" Hello " world" " ); // 'Hello "world"'
449+ toASCII (" em—dash" ); // 'em-dash'
450+ toASCII (" €100" ); // 'EUR100'
451+ toASCII (" ½ + ¼ = ¾" ); // '1/2 + 1/4 = 3/4'
452+ toASCII (" → ← ↑ ↓" ); // '-> <- ^ v'
453+ toASCII (" α β γ" ); // 'a b g'
454+ toASCII (" Привет" ); // 'Privet'
455+ toASCII (" 你好" ); // '' (removes non-convertible characters)
456+ toASCII (" 你好" , { placeholder: " ?" }); // '??'
457+ toASCII (" Hello 世界" , { placeholder: " ?" }); // 'Hello ??'
458+ toASCII (" © 2024 Müller™" ); // '(c) 2024 Muller(TM)'
459+ ```
460+
461+ Features:
462+
463+ - Removes diacritics/accents (café → cafe)
464+ - Converts smart quotes to regular quotes
465+ - Converts Unicode dashes to hyphens
466+ - Converts mathematical symbols (≈ → ~ , ≠ → !=)
467+ - Converts currency symbols (€ → EUR, £ → GBP)
468+ - Converts fractions (½ → 1/2)
469+ - Transliterates common Greek and Cyrillic letters
470+ - Handles emojis and multi-byte Unicode correctly
471+ - Optional placeholder for non-convertible characters
472+
363473## Bundle Size
364474
365475Each utility is optimized to be as small as possible:
366476
367- | Function | Size (minified) |
368- | ------------ | --------------- |
369- | slugify | ~ 200 bytes |
370- | camelCase | ~ 250 bytes |
371- | snakeCase | ~ 220 bytes |
372- | kebabCase | ~ 200 bytes |
373- | pascalCase | ~ 180 bytes |
374- | constantCase | ~ 230 bytes |
375- | dotCase | ~ 210 bytes |
376- | pathCase | ~ 210 bytes |
377- | sentenceCase | ~ 280 bytes |
378- | titleCase | ~ 320 bytes |
379- | capitalize | ~ 100 bytes |
380- | truncate | ~ 150 bytes |
381- | stripHtml | ~ 120 bytes |
382- | escapeHtml | ~ 180 bytes |
383- | randomString | ~ 200 bytes |
384- | hashString | ~ 150 bytes |
385- | reverse | ~ 80 bytes |
386- | deburr | ~ 200 bytes |
387- | isEmail | ~ 180 bytes |
388- | isUrl | ~ 200 bytes |
389- | wordCount | ~ 100 bytes |
390- | template | ~ 350 bytes |
391- | templateSafe | ~ 400 bytes |
392- | pad | ~ 180 bytes |
393- | padStart | ~ 150 bytes |
394- | padEnd | ~ 150 bytes |
477+ | Function | Size (minified) |
478+ | ------------------- | --------------- |
479+ | slugify | ~ 200 bytes |
480+ | camelCase | ~ 250 bytes |
481+ | snakeCase | ~ 220 bytes |
482+ | kebabCase | ~ 200 bytes |
483+ | pascalCase | ~ 180 bytes |
484+ | constantCase | ~ 230 bytes |
485+ | dotCase | ~ 210 bytes |
486+ | pathCase | ~ 210 bytes |
487+ | sentenceCase | ~ 280 bytes |
488+ | titleCase | ~ 320 bytes |
489+ | capitalize | ~ 100 bytes |
490+ | truncate | ~ 150 bytes |
491+ | stripHtml | ~ 120 bytes |
492+ | escapeHtml | ~ 180 bytes |
493+ | randomString | ~ 200 bytes |
494+ | hashString | ~ 150 bytes |
495+ | reverse | ~ 80 bytes |
496+ | deburr | ~ 200 bytes |
497+ | isEmail | ~ 180 bytes |
498+ | isUrl | ~ 200 bytes |
499+ | isASCII | ~ 100 bytes |
500+ | toASCII | ~ 450 bytes |
501+ | wordCount | ~ 100 bytes |
502+ | normalizeWhitespace | ~ 280 bytes |
503+ | removeNonPrintable | ~ 200 bytes |
504+ | template | ~ 350 bytes |
505+ | templateSafe | ~ 400 bytes |
506+ | pad | ~ 180 bytes |
507+ | padStart | ~ 150 bytes |
508+ | padEnd | ~ 150 bytes |
509+ | graphemes | ~ 250 bytes |
510+ | codePoints | ~ 120 bytes |
395511
396512Total package size: ** < 5KB** minified + gzipped
397513
0 commit comments