Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 33 additions & 3 deletions src/basic_memory/schemas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,38 @@
from annotated_types import MinLen, MaxLen
from dateparser import parse

from pydantic import BaseModel, BeforeValidator, Field, model_validator
from pydantic import BaseModel, BeforeValidator, Field, model_validator, computed_field

from basic_memory.config import ConfigManager
from basic_memory.file_utils import sanitize_for_filename, sanitize_for_folder
from basic_memory.utils import generate_permalink


def has_valid_file_extension(filename: str) -> bool:
"""Check if a filename has a valid file extension recognized by mimetypes.
This is used to determine whether to split the extension when processing
titles in kebab_filenames mode. Prevents treating periods in version numbers
or decimals as file extensions.
Args:
filename: The filename to check
Returns:
True if the filename has a recognized file extension, False otherwise
Examples:
>>> has_valid_file_extension("document.md")
True
>>> has_valid_file_extension("Version 2.0.0")
False
>>> has_valid_file_extension("image.png")
True
"""
mime_type, _ = mimetypes.guess_type(filename)
return mime_type is not None


def to_snake_case(name: str) -> str:
"""Convert a string to snake_case.
Expand Down Expand Up @@ -232,12 +257,17 @@ def safe_title(self) -> str:
use_kebab_case = app_config.kebab_filenames

if use_kebab_case:
fixed_title = generate_permalink(file_path=fixed_title, split_extension=False)
# Convert to kebab-case: lowercase with hyphens, preserving periods in version numbers
# generate_permalink() uses mimetypes to detect real file extensions and only splits
# them off, avoiding misinterpreting periods in version numbers as extensions
has_extension = has_valid_file_extension(fixed_title)
fixed_title = generate_permalink(file_path=fixed_title, split_extension=has_extension)

return fixed_title

@computed_field
@property
def file_path(self):
def file_path(self) -> str:
"""Get the file path for this entity based on its permalink."""
safe_title = self.safe_title
if self.content_type == "text/markdown":
Expand Down
29 changes: 23 additions & 6 deletions src/basic_memory/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,14 @@ def generate_permalink(file_path: Union[Path, str, PathLike], split_extension: b

Args:
file_path: Original file path (str, Path, or PathLike)
split_extension: Whether to split off and discard file extensions.
When True, uses mimetypes to detect real extensions.
When False, preserves all content including periods.

Returns:
Normalized permalink that matches validation rules. Converts spaces and underscores
to hyphens for consistency. Preserves non-ASCII characters like Chinese.
Preserves periods in version numbers (e.g., "2.0.0") when they're not real file extensions.

Examples:
>>> generate_permalink("docs/My Feature.md")
Expand All @@ -90,12 +94,25 @@ def generate_permalink(file_path: Union[Path, str, PathLike], split_extension: b
'design/unified-model-refactor'
>>> generate_permalink("中文/测试文档.md")
'中文/测试文档'
>>> generate_permalink("Version 2.0.0")
'version-2.0.0'
"""
# Convert Path to string if needed
path_str = Path(str(file_path)).as_posix()

# Remove extension (for now, possibly)
(base, extension) = os.path.splitext(path_str)
# Only split extension if there's a real file extension
# Use mimetypes to detect real extensions, avoiding misinterpreting periods in version numbers
import mimetypes
mime_type, _ = mimetypes.guess_type(path_str)
has_real_extension = mime_type is not None

if has_real_extension and split_extension:
# Real file extension detected - split it off
(base, extension) = os.path.splitext(path_str)
else:
# No real extension or split_extension=False - process the whole string
base = path_str
extension = ""

# Check if we have CJK characters that should be preserved
# CJK ranges: \u4e00-\u9fff (CJK Unified Ideographs), \u3000-\u303f (CJK symbols),
Expand Down Expand Up @@ -147,9 +164,9 @@ def generate_permalink(file_path: Union[Path, str, PathLike], split_extension: b
# Remove apostrophes entirely (don't replace with hyphens)
text_no_apostrophes = text_with_hyphens.replace("'", "")

# Replace unsafe chars with hyphens, but preserve CJK characters
# Replace unsafe chars with hyphens, but preserve CJK characters and periods
clean_text = re.sub(
r"[^a-z0-9\u4e00-\u9fff\u3000-\u303f\u3400-\u4dbf/\-]", "-", text_no_apostrophes
r"[^a-z0-9\u4e00-\u9fff\u3000-\u303f\u3400-\u4dbf/\-\.]", "-", text_no_apostrophes
)
else:
# Original ASCII-only processing for backward compatibility
Expand All @@ -168,8 +185,8 @@ def generate_permalink(file_path: Union[Path, str, PathLike], split_extension: b
# Remove apostrophes entirely (don't replace with hyphens)
text_no_apostrophes = text_with_hyphens.replace("'", "")

# Replace remaining invalid chars with hyphens
clean_text = re.sub(r"[^a-z0-9/\-]", "-", text_no_apostrophes)
# Replace remaining invalid chars with hyphens, preserving periods
clean_text = re.sub(r"[^a-z0-9/\-\.]", "-", text_no_apostrophes)

# Collapse multiple hyphens
clean_text = re.sub(r"-+", "-", clean_text)
Expand Down
Loading