Ensured that memoization does not overwrite chunk()'s function signature.

umarbutler · umarbutler · commit aa6142f0fe31 · 2024-03-11T15:28:41.000+11:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,10 @@
 ## Changelog 🔄
 All notable changes to `semchunk` will be documented here. This project adheres to [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.2.3] - 2024-03-11
+### Fixed
+- Ensured that memoization does not overwrite `chunk()`'s function signature.
+
 ## [0.2.2] - 2024-02-05
 ### Fixed
 - Ensured that the `memoize` argument is passed back to `chunk()` in recursive calls.
@@ -36,6 +40,7 @@ All notable changes to `semchunk` will be documented here. This project adheres
 ### Added
 - Added the `chunk()` function, which splits text into semantically meaningful chunks of a specified size as determined by a provided token counter.
 
+[0.2.3]: https://github.com/umarbutler/semchunk/compare/v0.2.2...v0.2.3
 [0.2.2]: https://github.com/umarbutler/semchunk/compare/v0.2.1...v0.2.2
 [0.2.1]: https://github.com/umarbutler/semchunk/compare/v0.2.0...v0.2.1
 [0.2.0]: https://github.com/umarbutler/semchunk/compare/v0.1.2...v0.2.0
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "semchunk"
-version = "0.2.2"
+version = "0.2.3"
 authors = [
   {name="Umar Butler", email="umar@umar.au"},
 ]
diff --git a/src/semchunk/semchunk.py b/src/semchunk/semchunk.py
@@ -1,5 +1,5 @@
 import re
-from functools import cache
+from functools import cache, wraps
 
 _memoised_token_counters = {}
 """A map of token counters to their memoised versions."""
@@ -45,7 +45,6 @@ def _split_text(text: str) -> tuple[str, bool, list[str]]:
     # Return the splitter and the split text.
     return splitter, splitter_is_whitespace, text.split(splitter)
 
-@cache
 def chunk(text: str, chunk_size: int, token_counter: callable, memoize: bool=True, _recursion_depth: int = 0) -> list[str]:
     """Split text into semantically meaningful chunks of a specified size as determined by the provided token counter.
 
@@ -113,4 +112,6 @@ def chunk(text: str, chunk_size: int, token_counter: callable, memoize: bool=Tru
     if not _recursion_depth:
         chunks = list(filter(None, chunks))
     
-    return chunks
+    return chunks
+
+chunk = wraps(chunk)(cache(chunk))

Original file line number	Diff line number	Diff line change
`@@ -4,7 +4,7 @@ build-backend = "hatchling.build"`
`4`	`4`
`5`	`5`	`[project]`
`6`	`6`	`name = "semchunk"`
`7`		`-version = "0.2.2"`
	`7`	`+version = "0.2.3"`
`8`	`8`	`authors = [`
`9`	`9`	`{name="Umar Butler", email="[email protected]"},`
`10`	`10`	`]`