Fix 'overwite' typo in parameter name (#2006)

kit1980 · web-flow · commit 7c7b6409dff2 · 2022-12-12T19:50:24.000-05:00
diff --git a/torchtext/transforms.py b/torchtext/transforms.py
@@ -830,7 +830,7 @@ def __init__(
         if never_split is None:
             never_split = []
         self.bert_model = BERTEncoderPyBind(
-            get_asset_local_path(vocab_path, overwite=True), do_lower_case, strip_accents, never_split
+            get_asset_local_path(vocab_path, overwrite=True), do_lower_case, strip_accents, never_split
         )
         self._return_tokens = return_tokens
         self._vocab_path = vocab_path
@@ -929,7 +929,7 @@ class RegexTokenizer(Module):
 
     Caveats
         - The RE2 library does not support arbitrary lookahead or lookbehind assertions, nor does it support backreferences. Look at the `docs <https://swtch.com/~rsc/regexp/regexp3.html#caveats>`_ here for more info.
-        - The final tokenization step always uses spaces as seperators. To split strings based on a specific regex pattern, similar to Python's `re.split <https://docs.python.org/3/library/re.html#re.split>`_, a tuple of ``('<regex_pattern>', ' ')`` can be provided.
+        - The final tokenization step always uses spaces as separators. To split strings based on a specific regex pattern, similar to Python's `re.split <https://docs.python.org/3/library/re.html#re.split>`_, a tuple of ``('<regex_pattern>', ' ')`` can be provided.
 
     Example
         Regex tokenization based on ``(patterns, replacements)`` list.
@@ -998,7 +998,7 @@ def bytes_to_unicode():
     The reversible bpe codes work on unicode strings.
     This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
     When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
-    This is a signficant percentage of your normal, say, 32K bpe vocab.
+    This is a significant percentage of your normal, say, 32K bpe vocab.
     To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
     And avoids mapping to whitespace/control characters the bpe code barfs on.
     """
diff --git a/torchtext/utils.py b/torchtext/utils.py
@@ -207,8 +207,8 @@ def _log_class_usage(klass):
     torch._C._log_api_usage_once(identifier)
 
 
-def get_asset_local_path(asset_path: str, overwite=False) -> str:
-    """Get local path for assets. Download if path does not exost locally
+def get_asset_local_path(asset_path: str, overwrite=False) -> str:
+    """Get local path for assets. Download if path does not exist locally
     Args:
         asset_path: Local path to asset or remote URL
         overwrite: Indicate whether to overwrite the file when downloading from URL (default: False)
@@ -224,5 +224,5 @@ def get_asset_local_path(asset_path: str, overwite=False) -> str:
     if os.path.exists(asset_path):
         local_path = asset_path
     else:
-        local_path = download_from_url(url=asset_path, root=_CACHE_DIR, overwrite=overwite)
+        local_path = download_from_url(url=asset_path, root=_CACHE_DIR, overwrite=overwrite)
     return local_path