Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions python/cocoindex/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@
from . import functions, sources, targets, cli, utils

from . import targets as storages # Deprecated: Use targets instead
from .validation import (
NamingError,
validate_field_name,
validate_flow_name,
validate_target_name,
)

from .auth_registry import AuthEntryReference, add_auth_entry, add_transient_auth_entry
from .flow import FlowBuilder, DataScope, DataSlice, Flow, transform_flow
Expand Down Expand Up @@ -61,6 +67,11 @@
"update_all_flows_async",
"setup_all_flows",
"drop_all_flows",
# Validation
"NamingError",
"validate_field_name",
"validate_flow_name",
"validate_target_name",
# Lib
"init",
"start_server",
Expand Down
11 changes: 7 additions & 4 deletions python/cocoindex/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import inspect
import re

from .validation import validate_flow_name, NamingError

from dataclasses import dataclass
from enum import Enum
from threading import Lock
Expand Down Expand Up @@ -832,10 +834,11 @@ def get_flow_full_name(name: str) -> str:

def add_flow_def(name: str, fl_def: Callable[[FlowBuilder, DataScope], None]) -> Flow:
"""Add a flow definition to the cocoindex library."""
if not all(c.isalnum() or c == "_" for c in name):
raise ValueError(
f"Flow name '{name}' contains invalid characters. Only alphanumeric characters and underscores are allowed."
)
try:
validate_flow_name(name)
except NamingError as e:
raise ValueError(str(e)) from e

with _flows_lock:
if name in _flows:
raise KeyError(f"Flow with name {name} already exists")
Expand Down
134 changes: 134 additions & 0 deletions python/cocoindex/tests/test_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
"""Tests for naming validation functionality."""

import pytest
from cocoindex.validation import (
validate_field_name,
validate_flow_name,
validate_full_flow_name,
validate_app_namespace_name,
validate_target_name,
NamingError,
validate_identifier_name,
)


class TestValidateIdentifierName:
"""Test the core validation function."""

def test_valid_names(self) -> None:
"""Test that valid names pass validation."""
valid_names = [
"field1",
"field_name",
"_private",
"a",
"field123",
"FIELD_NAME",
"MyField",
"field_123_test",
]

for name in valid_names:
result = validate_identifier_name(name)
assert result is None, f"Valid name '{name}' failed validation: {result}"

def test_valid_names_with_dots(self) -> None:
"""Test that valid names with dots pass validation when allowed."""
valid_names = ["app.flow", "my_app.my_flow", "namespace.sub.flow", "a.b.c.d"]

for name in valid_names:
result = validate_identifier_name(name, allow_dots=True)
assert result is None, (
f"Valid dotted name '{name}' failed validation: {result}"
)

def test_invalid_starting_characters(self) -> None:
"""Test names with invalid starting characters."""
invalid_names = [
"123field", # starts with digit
".field", # starts with dot
"-field", # starts with dash
" field", # starts with space
]

for name in invalid_names:
result = validate_identifier_name(name)
assert result is not None, (
f"Invalid name '{name}' should have failed validation"
)

def test_double_underscore_restriction(self) -> None:
"""Test double underscore restriction."""
invalid_names = ["__reserved", "__internal", "__test"]

for name in invalid_names:
result = validate_identifier_name(name)
assert result is not None
assert "double underscores" in result.lower()

def test_length_restriction(self) -> None:
"""Test maximum length restriction."""
long_name = "a" * 65
result = validate_identifier_name(long_name, max_length=64)
assert result is not None
assert "maximum length" in result.lower()


class TestSpecificValidators:
"""Test the specific validation functions."""

def test_valid_field_names(self) -> None:
"""Test valid field names."""
valid_names = ["field1", "field_name", "_private", "FIELD"]
for name in valid_names:
validate_field_name(name) # Should not raise

def test_invalid_field_names(self) -> None:
"""Test invalid field names raise NamingError."""
invalid_names = ["123field", "field-name", "__reserved", "a" * 65]

for name in invalid_names:
with pytest.raises(NamingError):
validate_field_name(name)

def test_flow_validation(self) -> None:
"""Test flow name validation."""
# Valid flow names
validate_flow_name("MyFlow")
validate_flow_name("my_flow_123")

# Invalid flow names
with pytest.raises(NamingError):
validate_flow_name("123flow")

with pytest.raises(NamingError):
validate_flow_name("__reserved_flow")

def test_full_flow_name_allows_dots(self) -> None:
"""Test that full flow names allow dots."""
validate_full_flow_name("app.my_flow")
validate_full_flow_name("namespace.subnamespace.flow")

# But still reject invalid patterns
with pytest.raises(NamingError):
validate_full_flow_name("123.invalid")

def test_target_validation(self) -> None:
"""Test target name validation."""
validate_target_name("my_target")
validate_target_name("output_table")

with pytest.raises(NamingError):
validate_target_name("123target")

def test_app_namespace_validation(self) -> None:
"""Test app namespace validation."""
validate_app_namespace_name("myapp")
validate_app_namespace_name("my_app_123")

# Should not allow dots in app namespace
with pytest.raises(NamingError):
validate_app_namespace_name("my.app")

with pytest.raises(NamingError):
validate_app_namespace_name("123app")
101 changes: 101 additions & 0 deletions python/cocoindex/validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
"""
Naming validation for CocoIndex identifiers.
This module enforces naming conventions for flow names, field names,
target names, and app namespace names as specified in issue #779.
"""

import re
from typing import Optional


class NamingError(ValueError):
"""Exception raised for naming convention violations."""

pass


def validate_identifier_name(
name: str,
max_length: int = 64,
allow_dots: bool = False,
identifier_type: str = "identifier",
) -> Optional[str]:
"""
Validate identifier names according to CocoIndex naming rules.
Args:
name: The name to validate
max_length: Maximum allowed length (default 64)
allow_dots: Whether to allow dots in the name (for full flow names)
identifier_type: Type of identifier for error messages
Returns:
None if valid, error message string if invalid
"""
if not name:
return f"{identifier_type} name cannot be empty"

if len(name) > max_length:
return f"{identifier_type} name '{name}' exceeds maximum length of {max_length} characters"

if name.startswith("__"):
return f"{identifier_type} name '{name}' cannot start with double underscores (reserved for internal usage)"

# Define allowed pattern
if allow_dots:
pattern = r"^[a-zA-Z_][a-zA-Z0-9_.]*$"
allowed_chars = "letters, digits, underscores, and dots"
else:
pattern = r"^[a-zA-Z_][a-zA-Z0-9_]*$"
allowed_chars = "letters, digits, and underscores"

if not re.match(pattern, name):
return f"{identifier_type} name '{name}' must start with a letter or underscore and contain only {allowed_chars}"

return None


def validate_field_name(name: str) -> None:
"""Validate field names."""
error = validate_identifier_name(
name, max_length=64, allow_dots=False, identifier_type="Field"
)
if error:
raise NamingError(error)


def validate_flow_name(name: str) -> None:
"""Validate flow names."""
error = validate_identifier_name(
name, max_length=64, allow_dots=False, identifier_type="Flow"
)
if error:
raise NamingError(error)


def validate_full_flow_name(name: str) -> None:
"""Validate full flow names (can contain dots for namespacing)."""
error = validate_identifier_name(
name, max_length=64, allow_dots=True, identifier_type="Full flow"
)
if error:
raise NamingError(error)


def validate_app_namespace_name(name: str) -> None:
"""Validate app namespace names."""
error = validate_identifier_name(
name, max_length=64, allow_dots=False, identifier_type="App namespace"
)
if error:
raise NamingError(error)


def validate_target_name(name: str) -> None:
"""Validate target names."""
error = validate_identifier_name(
name, max_length=64, allow_dots=False, identifier_type="Target"
)
if error:
raise NamingError(error)
Loading