Fixed typo in error messages in chunkerify().

umarbutler · umarbutler · commit 3b35b1e3c93f · 2024-05-18T22:09:25.000+10:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,10 @@
 ## Changelog 🔄
 All notable changes to `semchunk` will be documented here. This project adheres to [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.3.1] - 2024-05-18
+### Fixed
+- Fixed typo in error messages in `chunkerify()` where it was referred to as `make_chunker()`.
+
 ## [0.3.0] - 2024-05-18
 ### Added
 - Introduced the `chunkerify()` function, which constructs a chunker from a tokenizer or token counter that can be reused and can also chunk multiple texts in a single call. The resulting chunker speeds up chunking by 40.4% thanks, in large part, to a token counter that avoid having to count the number of tokens in a text when the number of characters in the text exceed a certain threshold, courtesy of [@R0bk](https://github.com/R0bk) ([#3](https://github.com/umarbutler/semchunk/pull/3)) ([337a186](https://github.com/umarbutler/semchunk/pull/3/commits/337a18615f991076b076262288b0408cb162b48c)).
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "semchunk"
-version = "0.3.0"
+version = "0.3.1"
 authors = [
   {name="Umar Butler", email="umar@umar.au"},
 ]
diff --git a/src/semchunk/semchunk.py b/src/semchunk/semchunk.py
@@ -178,7 +178,7 @@ def chunkerify(
                 tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer_or_token_counter)
             
             except Exception:
-                raise ValueError(f'"{tokenizer_or_token_counter}" was provided to `semchunk.make_chunker` as the name of a tokenizer but neither `tiktoken` nor `transformers` have a tokenizer by that name. Perhaps they are not installed or maybe there is a typo in that name?')
+                raise ValueError(f'"{tokenizer_or_token_counter}" was provided to `semchunk.chunkerify` as the name of a tokenizer but neither `tiktoken` nor `transformers` have a tokenizer by that name. Perhaps they are not installed or maybe there is a typo in that name?')
         
         tokenizer_or_token_counter = tokenizer
     
@@ -206,7 +206,7 @@ def chunkerify(
                     chunk_size -= len(tokenizer_or_token_counter.encode(''))
         
         else:
-            raise ValueError("Your desired chunk size was not passed to `semchunk.make_chunker` and the provided tokenizer either lacks an attribute named 'model_max_length' or that attribute is not an integer. Either specify a chunk size or provide a tokenizer that has a 'model_max_length' attribute that is an integer.")
+            raise ValueError("Your desired chunk size was not passed to `semchunk.chunkerify` and the provided tokenizer either lacks an attribute named 'model_max_length' or that attribute is not an integer. Either specify a chunk size or provide a tokenizer that has a 'model_max_length' attribute that is an integer.")
     
     # If we have been given a tokenizer, construct a token counter from it.
     if hasattr(tokenizer_or_token_counter, 'encode'):

Original file line number	Diff line number	Diff line change
`@@ -4,7 +4,7 @@ build-backend = "hatchling.build"`
`4`	`4`
`5`	`5`	`[project]`
`6`	`6`	`name = "semchunk"`
`7`		`-version = "0.3.0"`
	`7`	`+version = "0.3.1"`
`8`	`8`	`authors = [`
`9`	`9`	`{name="Umar Butler", email="[email protected]"},`
`10`	`10`	`]`