Skip to content

Commit b1b3465

Browse files
committed
use langcodes for locale validation
1 parent 9dab7b6 commit b1b3465

File tree

3 files changed

+95
-34
lines changed

3 files changed

+95
-34
lines changed

py/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ jaraco.context==6.0.1
2222
jaraco.functools==4.3.0
2323
jeepney==0.9.0
2424
keyring==25.6.0
25+
langcodes==3.5.0
2526
markdown-it-py==4.0.0
2627
mdurl==0.1.2
2728
more-itertools==10.8.0

py/requirements_lock.txt

Lines changed: 76 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -388,14 +388,88 @@ jeepney==0.9.0 \
388388
--hash=sha256:cf0e9e845622b81e4a28df94c40345400256ec608d0e55bb8a3feaa9163f5732
389389
# via
390390
# -r py/requirements.txt
391-
# keyring
392391
# secretstorage
393392
keyring==25.6.0 \
394393
--hash=sha256:0b39998aa941431eb3d9b0d4b2460bc773b9df6fed7621c2dfb291a7e0187a66 \
395394
--hash=sha256:552a3f7af126ece7ed5c89753650eec89c7eaae8617d0aa4d9ad2b75111266bd
396395
# via
397396
# -r py/requirements.txt
398397
# twine
398+
langcodes==3.5.0 \
399+
--hash=sha256:1eef8168d07e51e131a2497ffecad4b663f6208e7c3ae3b8dc15c51734a6f801 \
400+
--hash=sha256:853c69d1a35e0e13da2f427bb68fb2fa4a8f4fb899e0c62ad8df8d073dcfed33
401+
# via -r py/requirements.txt
402+
language-data==1.3.0 \
403+
--hash=sha256:7600ef8aa39555145d06c89f0c324bf7dab834ea0b0a439d8243762e3ebad7ec \
404+
--hash=sha256:e2ee943551b5ae5f89cd0e801d1fc3835bb0ef5b7e9c3a4e8e17b2b214548fbf
405+
# via langcodes
406+
marisa-trie==1.3.1 \
407+
--hash=sha256:076731f79f8603cb3216cb6e5bbbc56536c89f63f175ad47014219ecb01e5996 \
408+
--hash=sha256:0b9816ab993001a7854b02a7daec228892f35bd5ab0ac493bacbd1b80baec9f1 \
409+
--hash=sha256:0c2bc6bee737f4d47fce48c5b03a7bd3214ef2d83eb5c9f84210091370a5f195 \
410+
--hash=sha256:0dcd42774e367ceb423c211a4fc8e7ce586acfaf0929c9c06d98002112075239 \
411+
--hash=sha256:0e6f3b45def6ff23e254eeaa9079267004f0069d0a34eba30a620780caa4f2cb \
412+
--hash=sha256:137010598d8cebc53dbfb7caf59bde96c33a6af555e3e1bdbf30269b6a157e1e \
413+
--hash=sha256:2f7c10f69cbc3e6c7d715ec9cb0c270182ea2496063bebeda873f4aa83fd9910 \
414+
--hash=sha256:3715d779561699471edde70975e07b1de7dddb2816735d40ed16be4b32054188 \
415+
--hash=sha256:3834304fdeaa1c9b73596ad5a6c01a44fc19c13c115194704b85f7fbdf0a7b8e \
416+
--hash=sha256:389721481c14a92fa042e4b91ae065bff13e2bc567c85a10aa9d9de80aaa8622 \
417+
--hash=sha256:3a96ef3e461ecc85ec7d2233ddc449ff5a3fbdc520caea752bc5bc8faa975231 \
418+
--hash=sha256:3e2a0e1be95237981bd375a388f44b33d69ea5669a2f79fea038e45fff326595 \
419+
--hash=sha256:3e431f9c80ee1850b2a406770acf52c058b97a27968a0ed6aca45c2614d64c9f \
420+
--hash=sha256:47631614c5243ed7d15ae0af8245fcc0599f5b7921fae2a4ae992afb27c9afbb \
421+
--hash=sha256:52d1764906befef91886e3bff374d8090c9716822bd56b70e07aa697188090b7 \
422+
--hash=sha256:5370f9ef6c008e502537cc1ff518c80ddf749367ce90179efa0e7f6275903a76 \
423+
--hash=sha256:56043cf908ddf3d7364498085dbc2855d4ea8969aff3bf2439a79482a79e68e2 \
424+
--hash=sha256:5a6abc9573a6a45d09548fde136dbcd4260b8c56f8dff443eaa565352d7cca59 \
425+
--hash=sha256:5b7c1e7fa6c3b855e8cfbabf38454d7decbaba1c567d0cd58880d033c6b363bd \
426+
--hash=sha256:5ef045f694ef66079b4e00c4c9063a00183d6af7d1ff643de6ea5c3b0d9af01b \
427+
--hash=sha256:68678816818efcd4a1787b557af81f215b989ec88680a86c85c34c914d413690 \
428+
--hash=sha256:6cac19952e0e258ded765737d1fb11704fe81bf4f27526638a5d44496f329235 \
429+
--hash=sha256:70b4c96f9119cfeb4dc6a0cf4afc9f92f0b002cde225bcd910915d976c78e66a \
430+
--hash=sha256:7e957aa4251a8e70b9fe02a16b2d190f18787902da563cb7ba865508b8e8fb04 \
431+
--hash=sha256:82de2de90488d0fbbf74cf9f20e1afd62e320693b88f5e9565fc80b28f5bbad3 \
432+
--hash=sha256:83a3748088d117a9b15d8981c947df9e4f56eb2e4b5456ae34fe1f83666c9185 \
433+
--hash=sha256:83efc045fc58ca04c91a96c9b894d8a19ac6553677a76f96df01ff9f0405f53d \
434+
--hash=sha256:8c8b2386d2d22c57880ed20a913ceca86363765623175671137484a7d223f07a \
435+
--hash=sha256:8f81344d212cb41992340b0b8a67e375f44da90590b884204fd3fa5e02107df2 \
436+
--hash=sha256:954fef9185f8a79441b4e433695116636bf66402945cfee404f8983bafa59788 \
437+
--hash=sha256:9651daa1fdc471df5a5fa6a4833d3b01e76ac512eea141a5995681aebac5555f \
438+
--hash=sha256:9688c7b45f744366a4ef661e399f24636ebe440d315ab35d768676c59c613186 \
439+
--hash=sha256:97107fd12f30e4f8fea97790343a2d2d9a79d93697fe14e1b6f6363c984ff85b \
440+
--hash=sha256:9868b7a8e0f648d09ffe25ac29511e6e208cc5fb0d156c295385f9d5dc2a138e \
441+
--hash=sha256:986eaf35a7f63c878280609ecd37edf8a074f7601c199acfec81d03f1ee9a39a \
442+
--hash=sha256:99a00cab4cf9643a87977c87a5c8961aa44fff8d5dd46e00250135f686e7dedf \
443+
--hash=sha256:9c56001badaf1779afae5c24b7ab85938644ab8ef3c5fd438ab5d49621b84482 \
444+
--hash=sha256:9dc61fb8f8993589544f6df268229c6cf0a56ad4ed3e8585a9cd23c5ad79527b \
445+
--hash=sha256:9de573d933db4753a50af891bcb3ffbfe14e200406214c223aa5dfe2163f316d \
446+
--hash=sha256:9e467e13971c64db6aed8afe4c2a131c3f73f048bec3f788a6141216acda598d \
447+
--hash=sha256:9e6496bbad3068e3bbbb934b1e1307bf1a9cb4609f9ec47b57e8ea37f1b5ee40 \
448+
--hash=sha256:9f92d3577c72d5a97af5c8e3d98247b79c8ccfb64ebf611311dcf631b11e5604 \
449+
--hash=sha256:a1c6990961d1177f6d8fdf7b610fa2e7c0c02743a090d173f6dfa9dc9231c73c \
450+
--hash=sha256:a5a0a58ffe2a7eb3f870214c6df8f9a43ce768bd8fed883e6ba8c77645666b63 \
451+
--hash=sha256:a7416f1a084eb889c5792c57317875aeaa86abfe0bdc6f167712cebcec1d36ee \
452+
--hash=sha256:a83f5f7ae3494e0cc25211296252b1b86901c788ed82c83adda19d0c98f828d6 \
453+
--hash=sha256:a850b151bd1e3a5d9afef113adc22727d696603659d575d7e84f994bd8d04bf1 \
454+
--hash=sha256:ad82ab8a58562cf69e6b786debcc7638b28df12f9f1c7bcffb07efb5c1f09cbd \
455+
--hash=sha256:b173ec46d521308f7c97d96d6e05cf2088e0548f82544ec9a8656af65593304d \
456+
--hash=sha256:bf9f2b97fcfd5e2dbb0090d0664023872dcde990df0b545eca8d0ce95795a409 \
457+
--hash=sha256:c12b44c190deb0d67655021da1f2d0a7d61a257bf844101cf982e68ed344f28d \
458+
--hash=sha256:c6571462417cda2239b1ade86ceaf3852da9b52c6286046e87d404afc6da20a7 \
459+
--hash=sha256:c785fd6dae9daa6825734b7b494cdac972f958be1f9cb3fb1f32be8598d2b936 \
460+
--hash=sha256:c7a33506d0451112911c69f38d55da3e0e050f2be0ea4e5176865cf03baf26a9 \
461+
--hash=sha256:c89df75aefe1ad7e613340790130f1badc5926bcfa66a6b3c9471071002956a5 \
462+
--hash=sha256:ca644534f15f85bba14c412afc17de07531e79a766ce85b8dbf3f8b6e7758f20 \
463+
--hash=sha256:cbd28f95d5f30d9a7af6130869568e75bfd7ef2e0adfb1480f1f44480f5d3603 \
464+
--hash=sha256:d0f87bdf660f01e88ab3a507955697b2e3284065afa0b94fc9e77d6ad153ed5e \
465+
--hash=sha256:d4bd41a6e73c0d0adafe4de449b6d35530a4ce6a836a6ee839baf117785ecfd7 \
466+
--hash=sha256:d8d5e686db0ae758837ed29b3b742afb994d1a01ce10977eabd3490f16b5c9f9 \
467+
--hash=sha256:e5888b269e790356ce4525f3e8df1fe866d1497b7d7fb7548cfec883cb985288 \
468+
--hash=sha256:ec633e108f277f2b7f4671d933a909f39bba549910bf103e2940b87a14da2783 \
469+
--hash=sha256:ecdb19d33b26738a32602ef432b06cc6deeca4b498ce67ba8e5e39c8a7c19745 \
470+
--hash=sha256:ee428575377e29c636f2b4b3b0488875dcea310c6c5b3412ec4ef997f7bb37cc \
471+
--hash=sha256:f4bae4f920f2a1082eaf766c1883df7da84abdf333bafa15b8717c10416a615e
472+
# via language-data
399473
markdown-it-py==4.0.0 \
400474
--hash=sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147 \
401475
--hash=sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3
@@ -678,9 +752,7 @@ rich==14.1.0 \
678752
secretstorage==3.4.0 \
679753
--hash=sha256:0e3b6265c2c63509fb7415717607e4b2c9ab767b7f344a57473b779ca13bd02e \
680754
--hash=sha256:c46e216d6815aff8a8a18706a2fbfd8d53fcbb0dce99301881687a1b0289ef7c
681-
# via
682-
# -r py/requirements.txt
683-
# keyring
755+
# via -r py/requirements.txt
684756
sniffio==1.3.1 \
685757
--hash=sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2 \
686758
--hash=sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc

py/selenium/webdriver/common/bidi/emulation.py

Lines changed: 18 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,10 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717

18-
import re
1918
from typing import Any, Optional, Union
2019

20+
from langcodes import standardize_tag, tag_is_valid
21+
2122
from selenium.webdriver.common.bidi.common import command_builder
2223

2324

@@ -164,6 +165,20 @@ def to_dict(self) -> dict[str, str]:
164165
return {"type": self.type}
165166

166167

168+
def _is_valid_language_tag(locale: str) -> str | None:
169+
"""Validate and normalize a BCP 47 language tag."""
170+
171+
if locale is None:
172+
return None
173+
174+
if not tag_is_valid(locale):
175+
raise ValueError(f"Invalid locale: {locale}")
176+
177+
# Canonicalization / normalization
178+
normalized = standardize_tag(locale)
179+
return normalized
180+
181+
167182
class Emulation:
168183
"""
169184
BiDi implementation of the emulation module.
@@ -227,7 +242,7 @@ def set_locale_override(
227242
228243
Parameters:
229244
-----------
230-
locale: Locale string (language tag) to emulate, or None to clear override.
245+
locale: Locale string as per BCP 47, or None to clear override.
231246
contexts: List of browsing context IDs to apply the override to.
232247
user_contexts: List of user context IDs to apply the override to.
233248
@@ -242,7 +257,7 @@ def set_locale_override(
242257
if contexts is None and user_contexts is None:
243258
raise ValueError("Must specify either contexts or userContexts")
244259

245-
if locale is not None and not self._is_valid_language_tag(locale):
260+
if locale is not None and not _is_valid_language_tag(locale):
246261
raise ValueError(f"Invalid language tag: {locale}")
247262

248263
params: dict[str, Any] = {"locale": locale}
@@ -253,30 +268,3 @@ def set_locale_override(
253268
params["userContexts"] = user_contexts
254269

255270
self.conn.execute(command_builder("emulation.setLocaleOverride", params))
256-
257-
def _is_valid_language_tag(self, tag: str) -> bool:
258-
"""Check if a language tag is structurally valid according to BCP 47.
259-
260-
This is a simplified validation that covers the most common cases.
261-
Full BCP 47 validation would be more complex.
262-
263-
Parameters:
264-
-----------
265-
tag: The language tag to validate.
266-
267-
Returns:
268-
--------
269-
True if the tag is structurally valid, False otherwise.
270-
"""
271-
if not tag or not isinstance(tag, str):
272-
return False
273-
274-
# Basic BCP 47 language tag pattern
275-
# Format: language[-script][-region][-variant][-extension][-privateuse]
276-
# language: 2-3 lowercase letters
277-
# script: 4 letters with first uppercase
278-
# region: 2 uppercase letters or 3 digits
279-
# variant: 5-8 alphanumeric characters or 4 characters starting with digit
280-
pattern = r"^[a-z]{2,3}(?:-[A-Z][a-z]{3})?(?:-[A-Z]{2}|[0-9]{3})?(?:-[a-zA-Z0-9]{5,8}|[0-9][a-zA-Z0-9]{3})*(?:-[a-wy-zA-WY-Z0-9](?:-[a-zA-Z0-9]{2,8})+)*(?:-x(?:-[a-zA-Z0-9]{1,8})+)?$" # noqa: E501
281-
282-
return bool(re.match(pattern, tag))

0 commit comments

Comments
 (0)