nhsuk · moshaid · Jan 13, 2026 · Jan 15, 2026 · Jan 15, 2026 · Jan 16, 2026
diff --git a/packages/nhsuk-frontend/src/nhsuk/common/grapheme-count.jsdom.test.mjs b/packages/nhsuk-frontend/src/nhsuk/common/grapheme-count.jsdom.test.mjs
@@ -0,0 +1,82 @@
+import { graphemeCount } from './grapheme-count.mjs'
+
+describe('graphemeCount', () => {
+  it('counts simple strings', () => {
+    expect(graphemeCount('Hello')).toBe(5)
+    expect(graphemeCount('World')).toBe(5)
+    expect(graphemeCount('NHS')).toBe(3)
+  })
+
+  it('handles empty strings', () => {
+    expect(graphemeCount('')).toBe(0)
+  })
+
+  it('counts whitespace', () => {
+    expect(graphemeCount(' ')).toBe(1)
+    expect(graphemeCount('Hello World')).toBe(11)
+    expect(graphemeCount('\n\t')).toBe(2)
+  })
+
+  it('counts accented characters', () => {
+    expect(graphemeCount('café')).toBe(4)
+    expect(graphemeCount('naïve')).toBe(5)
+    expect(graphemeCount('résumé')).toBe(6)
+  })
+
+  it('counts non-Latin scripts', () => {
+    expect(graphemeCount('你好')).toBe(2)
+    expect(graphemeCount('こんにちは')).toBe(5)
+    expect(graphemeCount('안녕하세요')).toBe(5)
+    expect(graphemeCount('Привет')).toBe(6)
+  })
+
+  it('counts emoji', () => {
+    expect(graphemeCount('👋')).toBe(1)
+    expect(graphemeCount('😀')).toBe(1)
+    expect(graphemeCount('🇬🇧')).toBe(1)
+  })
+
+  it('counts emoji with skin tones', () => {
+    expect(graphemeCount('👋🏼')).toBe(1)
+    expect(graphemeCount('👍🏿')).toBe(1)
+  })
+
+  it('counts emoji sequences', () => {
+    expect(graphemeCount('👨‍👩‍👧‍👦')).toBe(1)
+    expect(graphemeCount('👨‍💻')).toBe(1)
+  })
+
+  it('counts multiple emoji', () => {
+    expect(graphemeCount('👋 Hello 👋🏼')).toBe(9)
+    expect(graphemeCount('😀😃😄')).toBe(3)
+  })
+
+  it('handles mixed content', () => {
+    expect(graphemeCount('Hello 👋 World')).toBe(13)
+    expect(graphemeCount('café 👋🏼')).toBe(6)
+  })
+
+  it('handles surrogate pairs', () => {
+    const note = '\u{1F3B5}'
+    expect(graphemeCount(note)).toBe(1)
+  })
+
+  it('throws for invalid input', () => {
+    expect(() => graphemeCount(null)).toThrow(TypeError)
+    expect(() => graphemeCount(undefined)).toThrow(TypeError)
+    // @ts-expect-error invalid type
+    expect(() => graphemeCount(123)).toThrow(TypeError)
+    // @ts-expect-error invalid type
+    expect(() => graphemeCount({})).toThrow(TypeError)
+    // @ts-expect-error invalid type
+    expect(() => graphemeCount([])).toThrow(TypeError)
+  })
+
+  it('works with real examples', () => {
+    expect(graphemeCount('NHS 👨‍⚕️')).toBe(5)
+    expect(graphemeCount('Call 111 🏥')).toBe(10)
+    expect(graphemeCount('Please describe your symptoms')).toBe(29)
+    expect(graphemeCount('Feeling unwell 😷')).toBe(16)
+    expect(graphemeCount('José García')).toBe(11)
+  })
+})
diff --git a/packages/nhsuk-frontend/src/nhsuk/common/grapheme-count.mjs b/packages/nhsuk-frontend/src/nhsuk/common/grapheme-count.mjs
@@ -0,0 +1,45 @@
+/**
+ * @param {string} text
+ * @returns {number}
+ */
+export function codePointCount(text) {
+  if (typeof text !== 'string') {
+    throw new TypeError('codePointCount expects a string argument')
+  }
+
+  let count = 0
+  let i = 0
+
+  while (i < text.length) {
+    const codePoint = text.codePointAt(i)
+    if (codePoint !== undefined) {
+      count++
+      i += codePoint > 0xffff ? 2 : 1
+    } else {
+      i++
+    }
+  }
+
+  return count
+}
+
+/**
+ * @param {string} text
+ * @returns {number}
+ */
+export function graphemeCount(text) {
+  if (typeof text !== 'string') {
+    throw new TypeError('graphemeCount expects a string argument')
+  }
+
+  if ('Segmenter' in Intl) {
+    try {
+      const segmenter = new Intl.Segmenter('en', { granularity: 'grapheme' })
+      return [...segmenter.segment(text)].length
+    } catch (_) {
+      void _
+    }
+  }
+
+  return codePointCount(text)
+}
diff --git a/packages/nhsuk-frontend/src/nhsuk/common/index.mjs b/packages/nhsuk-frontend/src/nhsuk/common/index.mjs
@@ -163,6 +163,7 @@ export function formatErrorMessage(Component, message) {
 }
 
 export * from './closest-attribute-value.mjs'
+export * from './grapheme-count.mjs'
 export * from './nhsuk-frontend-version.mjs'
 
 /**

diff --git a/packages/nhsuk-frontend/src/nhsuk/components/character-count/README.md b/packages/nhsuk-frontend/src/nhsuk/components/character-count/README.md
@@ -7,3 +7,84 @@ See the [main README quick start guide](https://github.com/nhsuk/nhsuk-frontend#
 ## Guidance and examples
 
 To learn more about the character count component and when to use it, visit the [design system in the NHS digital service manual](https://service-manual.nhs.uk/design-system/components/character-count) for guidance, examples and options.
+
+## How characters are counted
+
+By default, the character count component uses **code point counting**, which matches Python's `len()` function for Unicode strings. This ensures consistency between client-side (JavaScript) and server-side (Python) validation in `nhsuk-frontend-jinja`, preventing mismatched error messages.
+
+You can optionally enable **grapheme cluster counting** (user-perceived characters) by setting `useGraphemeCounting: true` in the component configuration. This provides more accurate counting for:
+
+- **Emoji and emoji sequences**: Emoji like 👋, 👋🏼 (with skin tone), and 👨‍👩‍👧‍👦 (family emoji) are each counted as a single character
+- **Characters with combining marks**: Accented characters like é, ñ, and ü are counted correctly regardless of whether they're stored as a single code point or as a base character plus combining mark
+- **Complex scripts**: Non-Latin scripts (Chinese, Japanese, Korean, Arabic, etc.) are counted accurately
+
+**Important**: Only enable grapheme counting if your server-side validation also uses grapheme counting. Otherwise, you may see different counts between client and server validation messages.
+
+### Examples
+
+**Default behavior (code point counting - matches Python `len()`):**
+
+```javascript
+// Simple ASCII
+"Hello" // 5 characters
+
+// Emoji (counted as code points)
+"👋" // 1 character
+"👋🏼" // 2 characters (base emoji + skin tone modifier)
+"👨‍👩‍👧‍👦" // 7 characters (multiple code points)
+
+// Accented characters
+"café" // 4 characters
+"naïve" // 5 characters
+
+// Mixed content
+"Hello 👋" // 7 characters (5 letters + 1 space + 1 emoji)
+```
+
+**With grapheme counting enabled (`useGraphemeCounting: true`):**
+
+```javascript
+// Emoji (counted as grapheme clusters)
+"👋" // 1 character
+"👋🏼" // 1 character (emoji with skin tone modifier)
+"👨‍👩‍👧‍👦" // 1 character (family emoji sequence)
+```
+
+### Configuration
+
+#### Default behavior (code point counting)
+
+By default, the component uses code point counting to match Python's `len()` behavior:
+
+```javascript
+new CharacterCount($root, {
+  maxlength: 200
+})
+```
+
+This ensures server-side consistency with `nhsuk-frontend-jinja` validation.
+
+#### Enabling grapheme cluster counting
+
+To use grapheme cluster counting (only if your server also uses it):
+
+```javascript
+new CharacterCount($root, {
+  maxlength: 200,
+  useGraphemeCounting: true
+})
+```
+
+Or via data attribute:
+
+```html
+<div data-module="nhsuk-character-count"
+     data-maxlength="200"
+     data-use-grapheme-counting="true">
+```
+
+### Browser support
+
+The default code point counting works across all supported browsers and matches Python's `len()` behavior, ensuring consistency with server-side validation.
+
+When `useGraphemeCounting` is enabled, the component uses `Intl.Segmenter` when available (Baseline 2024 browsers: Chrome 87+, Firefox 125+, Safari 17+, Edge 87+) for accurate grapheme cluster counting. For browsers that support ES6 modules but not `Intl.Segmenter` (such as Safari 11-16, Firefox 60-124, and older Chrome/Edge versions), it automatically falls back to code point counting.
diff --git a/packages/nhsuk-frontend/src/nhsuk/components/character-count/character-count.jsdom.test.mjs b/packages/nhsuk-frontend/src/nhsuk/components/character-count/character-count.jsdom.test.mjs
@@ -279,4 +279,96 @@ describe('Character count: Format count message', () => {
       'You have 10,000 words too many'
     )
   })
+
+  describe('Unicode and grapheme cluster counting', () => {
+    let component
+
+    beforeEach(() => {
+      const example = examples['to configure in JavaScript']
+
+      document.body.outerHTML = outdent`
+        <body class="nhsuk-frontend-supported">
+          ${components.render('character-count', example)}
+        </body>
+      `
+
+      const $root = document.querySelector(
+        `[data-module="${CharacterCount.moduleName}"]`
+      )
+
+      component = new CharacterCount($root, {
+        maxlength: 10,
+        useGraphemeCounting: true
+      })
+    })
+
+    it('counts emoji correctly', () => {
+      const $textarea = /** @type {HTMLTextAreaElement} */ (
+        document.querySelector('.nhsuk-js-character-count')
+      )
+      $textarea.value = '👋👋👋'
+      expect(component.count($textarea.value)).toBe(3)
+    })
+
+    it('counts emoji with skin tones', () => {
+      const $textarea = /** @type {HTMLTextAreaElement} */ (
+        document.querySelector('.nhsuk-js-character-count')
+      )
+      $textarea.value = '👋🏼👋🏿'
+      expect(component.count($textarea.value)).toBe(2)
+    })
+
+    it('counts emoji sequences', () => {
+      const $textarea = /** @type {HTMLTextAreaElement} */ (
+        document.querySelector('.nhsuk-js-character-count')
+      )
+      $textarea.value = '👨‍👩‍👧‍👦'
+      expect(component.count($textarea.value)).toBe(1)
+    })
+
+    it('counts accented characters', () => {
+      const $textarea = /** @type {HTMLTextAreaElement} */ (
+        document.querySelector('.nhsuk-js-character-count')
+      )
+      $textarea.value = 'café'
+      expect(component.count($textarea.value)).toBe(4)
+    })
+
+    it('counts mixed text and emoji', () => {
+      const $textarea = /** @type {HTMLTextAreaElement} */ (
+        document.querySelector('.nhsuk-js-character-count')
+      )
+      $textarea.value = 'Hi 👋'
+      expect(component.count($textarea.value)).toBe(4)
+    })
+
+    it('counts non-Latin scripts', () => {
+      const $textarea = /** @type {HTMLTextAreaElement} */ (
+        document.querySelector('.nhsuk-js-character-count')
+      )
+      $textarea.value = '你好'
+      expect(component.count($textarea.value)).toBe(2)
+    })
+
+    it('updates message with emoji', () => {
+      const $textarea = /** @type {HTMLTextAreaElement} */ (
+        document.querySelector('.nhsuk-js-character-count')
+      )
+      $textarea.value = '👋👋👋👋👋👋👋👋👋👋'
+      component.updateCountMessage()
+      const $status = document.querySelector('.nhsuk-character-count__status')
+      expect($status).toHaveTextContent('You have 0 characters remaining')
+    })
+
+    it('shows error when over limit with emoji', () => {
+      const $textarea = /** @type {HTMLTextAreaElement} */ (
+        document.querySelector('.nhsuk-js-character-count')
+      )
+      $textarea.value = '👋👋👋👋👋👋👋👋👋👋👋'
+      component.updateCountMessage()
+      const $status = document.querySelector('.nhsuk-character-count__status')
+      expect($status).toHaveTextContent('You have 1 character too many')
+      expect($status).toHaveClass('nhsuk-error-message')
+    })
+  })
 })
diff --git a/packages/nhsuk-frontend/src/nhsuk/components/character-count/character-count.mjs b/packages/nhsuk-frontend/src/nhsuk/components/character-count/character-count.mjs
@@ -3,6 +3,7 @@ import {
   normaliseOptions,
   validateConfig
 } from '../../common/configuration/index.mjs'
+import { codePointCount, graphemeCount } from '../../common/grapheme-count.mjs'
 import { formatErrorMessage } from '../../common/index.mjs'
 import { ConfigurableComponent } from '../../configurable-component.mjs'
 import { ConfigError, ElementError } from '../../errors/index.mjs'
@@ -175,19 +176,20 @@ export class CharacterCount extends ConfigurableComponent {
   }
 
   /**
-   * Count the number of characters (or words, if `config.maxwords` is set)
-   * in the given text
-   *
-   * @param {string} text - The text to count the characters of
-   * @returns {number} the number of characters (or words) in the text
+   * @param {string} text
+   * @returns {number}
    */
   count(text) {
     if (this.config.maxwords) {
-      const tokens = text.match(/\S+/g) ?? [] // Matches consecutive non-whitespace chars
+      const tokens = text.match(/\S+/g) ?? []
       return tokens.length
     }
 
-    return text.length
+    if (this.config.useGraphemeCounting) {
+      return graphemeCount(text)
+    }
+
+    return codePointCount(text)
   }
 
   /**
@@ -390,6 +392,7 @@ export class CharacterCount extends ConfigurableComponent {
     textareaDescriptionClass: 'nhsuk-character-count__message',
     visibleCountMessageClass: 'nhsuk-character-count__status',
     screenReaderCountMessageClass: 'nhsuk-character-count__sr-status',
+    useGraphemeCounting: false,
     i18n: {
       // Characters
       charactersUnderLimit: {
@@ -431,6 +434,7 @@ export class CharacterCount extends ConfigurableComponent {
       textareaDescriptionClass: { type: 'string' },
       visibleCountMessageClass: { type: 'string' },
       screenReaderCountMessageClass: { type: 'string' },
+      useGraphemeCounting: { type: 'boolean' },
       i18n: { type: 'object' }
     },
     anyOf: [
@@ -479,6 +483,10 @@ export function initCharacterCounts(options) {
  * @property {string} textareaDescriptionClass - Textarea description class
  * @property {string} visibleCountMessageClass - Visible count message class
  * @property {string} screenReaderCountMessageClass - Screen reader count message class
+ * @property {boolean} [useGraphemeCounting=false] - If true, uses grapheme cluster
+ *   counting (user-perceived characters) instead of code point counting. Defaults
+ *   to false to ensure consistency with Python's `len()` and server-side validation.
+ *   Only enable if your server-side validation also uses grapheme counting.
  * @property {CharacterCountTranslations} [i18n=CharacterCount.defaults.i18n] - Character count translations
  */
-Original file line number
+Diff line change
@@ Expand Up / @@ -163,6 +163,7 @@ export function formatErrorMessage(Component, message) { @@
     }
     export * from './closest-attribute-value.mjs'
+    export * from './grapheme-count.mjs'
     export * from './nhsuk-frontend-version.mjs'
     /**
@@ Expand Down @@