Skip to content

Commit 84aff5e

Browse files
authored
fix: Allow less restrictive values for parameters in Pipeline configurations (#3345)
* fix: Allow arbitrary values for parameters in Pipeline configurations * Add test * Adapt expected error message in tests * Fix bug * Fix bug on checking JSON * Remove test cases that previously tested if error was thrown * Change encoding in test * Restrict possible values * Re-add tests * Re-add tests * Add value flag to list elements
1 parent 797c20c commit 84aff5e

File tree

2 files changed

+46
-6
lines changed

2 files changed

+46
-6
lines changed

haystack/pipelines/config.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121
logger = logging.getLogger(__name__)
2222

2323

24-
VALID_INPUT_REGEX = re.compile(r"^[-a-zA-Z0-9_/\\.:*]+$")
24+
VALID_KEY_REGEX = re.compile(r"^[-\w/\\.:*]+$")
25+
VALID_VALUE_REGEX = re.compile(r"^[-\w/\\.:* \[\]]+$")
2526
VALID_ROOT_NODES = ["Query", "File"]
2627

2728

@@ -100,15 +101,14 @@ def read_pipeline_config_from_yaml(path: Path) -> Dict[str, Any]:
100101
JSON_FIELDS = ["custom_query"] # ElasticsearchDocumentStore.custom_query
101102

102103

103-
def validate_config_strings(pipeline_config: Any):
104+
def validate_config_strings(pipeline_config: Any, is_value: bool = False):
104105
"""
105106
Ensures that strings used in the pipelines configuration
106107
contain only alphanumeric characters and basic punctuation.
107108
"""
108109
try:
109110
if isinstance(pipeline_config, dict):
110111
for key, value in pipeline_config.items():
111-
112112
# FIXME find a better solution
113113
# Some nodes take parameters that expect JSON input,
114114
# like `ElasticsearchDocumentStore.custom_query`
@@ -125,14 +125,15 @@ def validate_config_strings(pipeline_config: Any):
125125
raise PipelineConfigError(f"'{pipeline_config}' does not contain valid JSON.")
126126
else:
127127
validate_config_strings(key)
128-
validate_config_strings(value)
128+
validate_config_strings(value, is_value=True)
129129

130130
elif isinstance(pipeline_config, list):
131131
for value in pipeline_config:
132-
validate_config_strings(value)
132+
validate_config_strings(value, is_value=True)
133133

134134
else:
135-
if not VALID_INPUT_REGEX.match(str(pipeline_config)):
135+
valid_regex = VALID_VALUE_REGEX if is_value else VALID_KEY_REGEX
136+
if not valid_regex.match(str(pipeline_config)):
136137
raise PipelineConfigError(
137138
f"'{pipeline_config}' is not a valid variable name or value. "
138139
"Use alphanumeric characters or dash, underscore and colon only."

test/pipelines/test_pipeline_yaml.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,6 +1029,45 @@ def test_load_yaml_disconnected_component(tmp_path):
10291029
assert not pipeline.get_node("retriever")
10301030

10311031

1032+
def test_load_yaml_unusual_chars_in_values(tmp_path):
1033+
class DummyNode(BaseComponent):
1034+
outgoing_edges = 1
1035+
1036+
def __init__(self, space_param, non_alphanumeric_param):
1037+
super().__init__()
1038+
self.space_param = space_param
1039+
self.non_alphanumeric_param = non_alphanumeric_param
1040+
1041+
def run(self):
1042+
raise NotImplementedError
1043+
1044+
def run_batch(self):
1045+
raise NotImplementedError
1046+
1047+
with open(tmp_path / "tmp_config.yml", "w", encoding="utf-8") as tmp_file:
1048+
tmp_file.write(
1049+
f"""
1050+
version: '1.9.0'
1051+
1052+
components:
1053+
- name: DummyNode
1054+
type: DummyNode
1055+
params:
1056+
space_param: with space
1057+
non_alphanumeric_param: \[ümlaut\]
1058+
1059+
pipelines:
1060+
- name: indexing
1061+
nodes:
1062+
- name: DummyNode
1063+
inputs: [File]
1064+
"""
1065+
)
1066+
pipeline = Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
1067+
assert pipeline.components["DummyNode"].space_param == "with space"
1068+
assert pipeline.components["DummyNode"].non_alphanumeric_param == "\\[ümlaut\\]"
1069+
1070+
10321071
def test_save_yaml(tmp_path):
10331072
pipeline = Pipeline()
10341073
pipeline.add_node(MockRetriever(), name="retriever", inputs=["Query"])

0 commit comments

Comments
 (0)