Skip to content

Commit e781f9c

Browse files
divideby0claude
andcommitted
fix: preserve periods in version numbers with kebab_filenames mode
Fixes #423 Implements maintainer feedback to preserve periods in version numbers (e.g., "2.0.0") instead of converting them to hyphens. Uses mimetypes.guess_type() to detect real file extensions and avoid misinterpreting periods as extensions. Changes: - Add has_valid_file_extension() helper using mimetypes.guess_type() - Modify generate_permalink() to conditionally call os.path.splitext() only when real file extensions are detected - Update regex patterns to preserve periods in both CJK and non-CJK paths - Update 19 test cases to expect period preservation Examples: - "Test 3.0 Version" → test-3.0-version.md (was: test-3-0-version.md) - "Version 1.2.3 Release" → version-1.2.3-release.md 🤖 Generated with Claude Code (https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]> Signed-off-by: Cedric Hurst <[email protected]>
1 parent 099c334 commit e781f9c

File tree

3 files changed

+448
-9
lines changed

3 files changed

+448
-9
lines changed

src/basic_memory/schemas/base.py

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,38 @@
2121
from annotated_types import MinLen, MaxLen
2222
from dateparser import parse
2323

24-
from pydantic import BaseModel, BeforeValidator, Field, model_validator
24+
from pydantic import BaseModel, BeforeValidator, Field, model_validator, computed_field
2525

2626
from basic_memory.config import ConfigManager
2727
from basic_memory.file_utils import sanitize_for_filename, sanitize_for_folder
2828
from basic_memory.utils import generate_permalink
2929

3030

31+
def has_valid_file_extension(filename: str) -> bool:
32+
"""Check if a filename has a valid file extension recognized by mimetypes.
33+
34+
This is used to determine whether to split the extension when processing
35+
titles in kebab_filenames mode. Prevents treating periods in version numbers
36+
or decimals as file extensions.
37+
38+
Args:
39+
filename: The filename to check
40+
41+
Returns:
42+
True if the filename has a recognized file extension, False otherwise
43+
44+
Examples:
45+
>>> has_valid_file_extension("document.md")
46+
True
47+
>>> has_valid_file_extension("Version 2.0.0")
48+
False
49+
>>> has_valid_file_extension("image.png")
50+
True
51+
"""
52+
mime_type, _ = mimetypes.guess_type(filename)
53+
return mime_type is not None
54+
55+
3156
def to_snake_case(name: str) -> str:
3257
"""Convert a string to snake_case.
3358
@@ -232,12 +257,17 @@ def safe_title(self) -> str:
232257
use_kebab_case = app_config.kebab_filenames
233258

234259
if use_kebab_case:
235-
fixed_title = generate_permalink(file_path=fixed_title, split_extension=False)
260+
# Convert to kebab-case: lowercase with hyphens, preserving periods in version numbers
261+
# generate_permalink() uses mimetypes to detect real file extensions and only splits
262+
# them off, avoiding misinterpreting periods in version numbers as extensions
263+
has_extension = has_valid_file_extension(fixed_title)
264+
fixed_title = generate_permalink(file_path=fixed_title, split_extension=has_extension)
236265

237266
return fixed_title
238267

268+
@computed_field
239269
@property
240-
def file_path(self):
270+
def file_path(self) -> str:
241271
"""Get the file path for this entity based on its permalink."""
242272
safe_title = self.safe_title
243273
if self.content_type == "text/markdown":

src/basic_memory/utils.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,14 @@ def generate_permalink(file_path: Union[Path, str, PathLike], split_extension: b
7676
7777
Args:
7878
file_path: Original file path (str, Path, or PathLike)
79+
split_extension: Whether to split off and discard file extensions.
80+
When True, uses mimetypes to detect real extensions.
81+
When False, preserves all content including periods.
7982
8083
Returns:
8184
Normalized permalink that matches validation rules. Converts spaces and underscores
8285
to hyphens for consistency. Preserves non-ASCII characters like Chinese.
86+
Preserves periods in version numbers (e.g., "2.0.0") when they're not real file extensions.
8387
8488
Examples:
8589
>>> generate_permalink("docs/My Feature.md")
@@ -90,12 +94,25 @@ def generate_permalink(file_path: Union[Path, str, PathLike], split_extension: b
9094
'design/unified-model-refactor'
9195
>>> generate_permalink("中文/测试文档.md")
9296
'中文/测试文档'
97+
>>> generate_permalink("Version 2.0.0")
98+
'version-2.0.0'
9399
"""
94100
# Convert Path to string if needed
95101
path_str = Path(str(file_path)).as_posix()
96102

97-
# Remove extension (for now, possibly)
98-
(base, extension) = os.path.splitext(path_str)
103+
# Only split extension if there's a real file extension
104+
# Use mimetypes to detect real extensions, avoiding misinterpreting periods in version numbers
105+
import mimetypes
106+
mime_type, _ = mimetypes.guess_type(path_str)
107+
has_real_extension = mime_type is not None
108+
109+
if has_real_extension and split_extension:
110+
# Real file extension detected - split it off
111+
(base, extension) = os.path.splitext(path_str)
112+
else:
113+
# No real extension or split_extension=False - process the whole string
114+
base = path_str
115+
extension = ""
99116

100117
# Check if we have CJK characters that should be preserved
101118
# CJK ranges: \u4e00-\u9fff (CJK Unified Ideographs), \u3000-\u303f (CJK symbols),
@@ -147,9 +164,9 @@ def generate_permalink(file_path: Union[Path, str, PathLike], split_extension: b
147164
# Remove apostrophes entirely (don't replace with hyphens)
148165
text_no_apostrophes = text_with_hyphens.replace("'", "")
149166

150-
# Replace unsafe chars with hyphens, but preserve CJK characters
167+
# Replace unsafe chars with hyphens, but preserve CJK characters and periods
151168
clean_text = re.sub(
152-
r"[^a-z0-9\u4e00-\u9fff\u3000-\u303f\u3400-\u4dbf/\-]", "-", text_no_apostrophes
169+
r"[^a-z0-9\u4e00-\u9fff\u3000-\u303f\u3400-\u4dbf/\-\.]", "-", text_no_apostrophes
153170
)
154171
else:
155172
# Original ASCII-only processing for backward compatibility
@@ -168,8 +185,8 @@ def generate_permalink(file_path: Union[Path, str, PathLike], split_extension: b
168185
# Remove apostrophes entirely (don't replace with hyphens)
169186
text_no_apostrophes = text_with_hyphens.replace("'", "")
170187

171-
# Replace remaining invalid chars with hyphens
172-
clean_text = re.sub(r"[^a-z0-9/\-]", "-", text_no_apostrophes)
188+
# Replace remaining invalid chars with hyphens, preserving periods
189+
clean_text = re.sub(r"[^a-z0-9/\-\.]", "-", text_no_apostrophes)
173190

174191
# Collapse multiple hyphens
175192
clean_text = re.sub(r"-+", "-", clean_text)

0 commit comments

Comments
 (0)