Skip to content

Commit 35a27ff

Browse files
committed
Add naming validation for flow identifiers
- Implement validation module for flow names, field names, and targets - Enforce 64-char limit and alphanumeric+underscore restrictions - Prevent double-underscore prefixes (reserved for internal use) - Add comprehensive test coverage with 11 test cases - Maintain backward compatibility Fixes #779
1 parent 0a36a8f commit 35a27ff

File tree

4 files changed

+253
-4
lines changed

4 files changed

+253
-4
lines changed

python/cocoindex/__init__.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@
55
from . import functions, sources, targets, cli, utils
66

77
from . import targets as storages # Deprecated: Use targets instead
8+
from .validation import (
9+
NamingError,
10+
validate_field_name,
11+
validate_flow_name,
12+
validate_target_name,
13+
)
814

915
from .auth_registry import AuthEntryReference, add_auth_entry, add_transient_auth_entry
1016
from .flow import FlowBuilder, DataScope, DataSlice, Flow, transform_flow
@@ -61,6 +67,11 @@
6167
"update_all_flows_async",
6268
"setup_all_flows",
6369
"drop_all_flows",
70+
# Validation
71+
"NamingError",
72+
"validate_field_name",
73+
"validate_flow_name",
74+
"validate_target_name",
6475
# Lib
6576
"init",
6677
"start_server",

python/cocoindex/flow.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
import inspect
1111
import re
1212

13+
from .validation import validate_flow_name, NamingError
14+
1315
from dataclasses import dataclass
1416
from enum import Enum
1517
from threading import Lock
@@ -832,10 +834,11 @@ def get_flow_full_name(name: str) -> str:
832834

833835
def add_flow_def(name: str, fl_def: Callable[[FlowBuilder, DataScope], None]) -> Flow:
834836
"""Add a flow definition to the cocoindex library."""
835-
if not all(c.isalnum() or c == "_" for c in name):
836-
raise ValueError(
837-
f"Flow name '{name}' contains invalid characters. Only alphanumeric characters and underscores are allowed."
838-
)
837+
try:
838+
validate_flow_name(name)
839+
except NamingError as e:
840+
raise ValueError(str(e)) from e
841+
839842
with _flows_lock:
840843
if name in _flows:
841844
raise KeyError(f"Flow with name {name} already exists")
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
"""Tests for naming validation functionality."""
2+
3+
import pytest
4+
from cocoindex.validation import (
5+
validate_field_name,
6+
validate_flow_name,
7+
validate_full_flow_name,
8+
validate_app_namespace_name,
9+
validate_target_name,
10+
NamingError,
11+
validate_identifier_name,
12+
)
13+
14+
15+
class TestValidateIdentifierName:
16+
"""Test the core validation function."""
17+
18+
def test_valid_names(self) -> None:
19+
"""Test that valid names pass validation."""
20+
valid_names = [
21+
"field1",
22+
"field_name",
23+
"_private",
24+
"a",
25+
"field123",
26+
"FIELD_NAME",
27+
"MyField",
28+
"field_123_test",
29+
]
30+
31+
for name in valid_names:
32+
result = validate_identifier_name(name)
33+
assert result is None, f"Valid name '{name}' failed validation: {result}"
34+
35+
def test_valid_names_with_dots(self) -> None:
36+
"""Test that valid names with dots pass validation when allowed."""
37+
valid_names = ["app.flow", "my_app.my_flow", "namespace.sub.flow", "a.b.c.d"]
38+
39+
for name in valid_names:
40+
result = validate_identifier_name(name, allow_dots=True)
41+
assert result is None, (
42+
f"Valid dotted name '{name}' failed validation: {result}"
43+
)
44+
45+
def test_invalid_starting_characters(self) -> None:
46+
"""Test names with invalid starting characters."""
47+
invalid_names = [
48+
"123field", # starts with digit
49+
".field", # starts with dot
50+
"-field", # starts with dash
51+
" field", # starts with space
52+
]
53+
54+
for name in invalid_names:
55+
result = validate_identifier_name(name)
56+
assert result is not None, (
57+
f"Invalid name '{name}' should have failed validation"
58+
)
59+
60+
def test_double_underscore_restriction(self) -> None:
61+
"""Test double underscore restriction."""
62+
invalid_names = ["__reserved", "__internal", "__test"]
63+
64+
for name in invalid_names:
65+
result = validate_identifier_name(name)
66+
assert result is not None
67+
assert "double underscores" in result.lower()
68+
69+
def test_length_restriction(self) -> None:
70+
"""Test maximum length restriction."""
71+
long_name = "a" * 65
72+
result = validate_identifier_name(long_name, max_length=64)
73+
assert result is not None
74+
assert "maximum length" in result.lower()
75+
76+
77+
class TestSpecificValidators:
78+
"""Test the specific validation functions."""
79+
80+
def test_valid_field_names(self) -> None:
81+
"""Test valid field names."""
82+
valid_names = ["field1", "field_name", "_private", "FIELD"]
83+
for name in valid_names:
84+
validate_field_name(name) # Should not raise
85+
86+
def test_invalid_field_names(self) -> None:
87+
"""Test invalid field names raise NamingError."""
88+
invalid_names = ["123field", "field-name", "__reserved", "a" * 65]
89+
90+
for name in invalid_names:
91+
with pytest.raises(NamingError):
92+
validate_field_name(name)
93+
94+
def test_flow_validation(self) -> None:
95+
"""Test flow name validation."""
96+
# Valid flow names
97+
validate_flow_name("MyFlow")
98+
validate_flow_name("my_flow_123")
99+
100+
# Invalid flow names
101+
with pytest.raises(NamingError):
102+
validate_flow_name("123flow")
103+
104+
with pytest.raises(NamingError):
105+
validate_flow_name("__reserved_flow")
106+
107+
def test_full_flow_name_allows_dots(self) -> None:
108+
"""Test that full flow names allow dots."""
109+
validate_full_flow_name("app.my_flow")
110+
validate_full_flow_name("namespace.subnamespace.flow")
111+
112+
# But still reject invalid patterns
113+
with pytest.raises(NamingError):
114+
validate_full_flow_name("123.invalid")
115+
116+
def test_target_validation(self) -> None:
117+
"""Test target name validation."""
118+
validate_target_name("my_target")
119+
validate_target_name("output_table")
120+
121+
with pytest.raises(NamingError):
122+
validate_target_name("123target")
123+
124+
def test_app_namespace_validation(self) -> None:
125+
"""Test app namespace validation."""
126+
validate_app_namespace_name("myapp")
127+
validate_app_namespace_name("my_app_123")
128+
129+
# Should not allow dots in app namespace
130+
with pytest.raises(NamingError):
131+
validate_app_namespace_name("my.app")
132+
133+
with pytest.raises(NamingError):
134+
validate_app_namespace_name("123app")

python/cocoindex/validation.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
"""
2+
Naming validation for CocoIndex identifiers.
3+
4+
This module enforces naming conventions for flow names, field names,
5+
target names, and app namespace names as specified in issue #779.
6+
"""
7+
8+
import re
9+
from typing import Optional
10+
11+
12+
class NamingError(ValueError):
13+
"""Exception raised for naming convention violations."""
14+
15+
pass
16+
17+
18+
def validate_identifier_name(
19+
name: str,
20+
max_length: int = 64,
21+
allow_dots: bool = False,
22+
identifier_type: str = "identifier",
23+
) -> Optional[str]:
24+
"""
25+
Validate identifier names according to CocoIndex naming rules.
26+
27+
Args:
28+
name: The name to validate
29+
max_length: Maximum allowed length (default 64)
30+
allow_dots: Whether to allow dots in the name (for full flow names)
31+
identifier_type: Type of identifier for error messages
32+
33+
Returns:
34+
None if valid, error message string if invalid
35+
"""
36+
if not name:
37+
return f"{identifier_type} name cannot be empty"
38+
39+
if len(name) > max_length:
40+
return f"{identifier_type} name '{name}' exceeds maximum length of {max_length} characters"
41+
42+
if name.startswith("__"):
43+
return f"{identifier_type} name '{name}' cannot start with double underscores (reserved for internal usage)"
44+
45+
# Define allowed pattern
46+
if allow_dots:
47+
pattern = r"^[a-zA-Z_][a-zA-Z0-9_.]*$"
48+
allowed_chars = "letters, digits, underscores, and dots"
49+
else:
50+
pattern = r"^[a-zA-Z_][a-zA-Z0-9_]*$"
51+
allowed_chars = "letters, digits, and underscores"
52+
53+
if not re.match(pattern, name):
54+
return f"{identifier_type} name '{name}' must start with a letter or underscore and contain only {allowed_chars}"
55+
56+
return None
57+
58+
59+
def validate_field_name(name: str) -> None:
60+
"""Validate field names."""
61+
error = validate_identifier_name(
62+
name, max_length=64, allow_dots=False, identifier_type="Field"
63+
)
64+
if error:
65+
raise NamingError(error)
66+
67+
68+
def validate_flow_name(name: str) -> None:
69+
"""Validate flow names."""
70+
error = validate_identifier_name(
71+
name, max_length=64, allow_dots=False, identifier_type="Flow"
72+
)
73+
if error:
74+
raise NamingError(error)
75+
76+
77+
def validate_full_flow_name(name: str) -> None:
78+
"""Validate full flow names (can contain dots for namespacing)."""
79+
error = validate_identifier_name(
80+
name, max_length=64, allow_dots=True, identifier_type="Full flow"
81+
)
82+
if error:
83+
raise NamingError(error)
84+
85+
86+
def validate_app_namespace_name(name: str) -> None:
87+
"""Validate app namespace names."""
88+
error = validate_identifier_name(
89+
name, max_length=64, allow_dots=False, identifier_type="App namespace"
90+
)
91+
if error:
92+
raise NamingError(error)
93+
94+
95+
def validate_target_name(name: str) -> None:
96+
"""Validate target names."""
97+
error = validate_identifier_name(
98+
name, max_length=64, allow_dots=False, identifier_type="Target"
99+
)
100+
if error:
101+
raise NamingError(error)

0 commit comments

Comments
 (0)