Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit 7c7b640

Browse files
authored
Fix 'overwite' typo in parameter name (#2006)
1 parent 651a033 commit 7c7b640

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

torchtext/transforms.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -830,7 +830,7 @@ def __init__(
830830
if never_split is None:
831831
never_split = []
832832
self.bert_model = BERTEncoderPyBind(
833-
get_asset_local_path(vocab_path, overwite=True), do_lower_case, strip_accents, never_split
833+
get_asset_local_path(vocab_path, overwrite=True), do_lower_case, strip_accents, never_split
834834
)
835835
self._return_tokens = return_tokens
836836
self._vocab_path = vocab_path
@@ -929,7 +929,7 @@ class RegexTokenizer(Module):
929929
930930
Caveats
931931
- The RE2 library does not support arbitrary lookahead or lookbehind assertions, nor does it support backreferences. Look at the `docs <https://swtch.com/~rsc/regexp/regexp3.html#caveats>`_ here for more info.
932-
- The final tokenization step always uses spaces as seperators. To split strings based on a specific regex pattern, similar to Python's `re.split <https://docs.python.org/3/library/re.html#re.split>`_, a tuple of ``('<regex_pattern>', ' ')`` can be provided.
932+
- The final tokenization step always uses spaces as separators. To split strings based on a specific regex pattern, similar to Python's `re.split <https://docs.python.org/3/library/re.html#re.split>`_, a tuple of ``('<regex_pattern>', ' ')`` can be provided.
933933
934934
Example
935935
Regex tokenization based on ``(patterns, replacements)`` list.
@@ -998,7 +998,7 @@ def bytes_to_unicode():
998998
The reversible bpe codes work on unicode strings.
999999
This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
10001000
When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
1001-
This is a signficant percentage of your normal, say, 32K bpe vocab.
1001+
This is a significant percentage of your normal, say, 32K bpe vocab.
10021002
To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
10031003
And avoids mapping to whitespace/control characters the bpe code barfs on.
10041004
"""

torchtext/utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -207,8 +207,8 @@ def _log_class_usage(klass):
207207
torch._C._log_api_usage_once(identifier)
208208

209209

210-
def get_asset_local_path(asset_path: str, overwite=False) -> str:
211-
"""Get local path for assets. Download if path does not exost locally
210+
def get_asset_local_path(asset_path: str, overwrite=False) -> str:
211+
"""Get local path for assets. Download if path does not exist locally
212212
Args:
213213
asset_path: Local path to asset or remote URL
214214
overwrite: Indicate whether to overwrite the file when downloading from URL (default: False)
@@ -224,5 +224,5 @@ def get_asset_local_path(asset_path: str, overwite=False) -> str:
224224
if os.path.exists(asset_path):
225225
local_path = asset_path
226226
else:
227-
local_path = download_from_url(url=asset_path, root=_CACHE_DIR, overwrite=overwite)
227+
local_path = download_from_url(url=asset_path, root=_CACHE_DIR, overwrite=overwrite)
228228
return local_path

0 commit comments

Comments
 (0)