Skip to content

Commit 2ea5fa5

Browse files
authored
[SYNPY-1575] Correct regular expression for invalid column name (#1187)
* Correct regular expression for invalid column name
1 parent 8a9ad21 commit 2ea5fa5

File tree

3 files changed

+122
-18
lines changed

3 files changed

+122
-18
lines changed

synapseclient/models/mixins/table_components.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -402,9 +402,9 @@ async def store_async(
402402
await self._append_default_columns(synapse_client=synapse_client)
403403

404404
if self.columns:
405-
# check that column names match this regex "^[a-zA-Z0-9,_.]+"
405+
# check that column names match this regex "^[a-zA-Z0-9 _\-\.\+\(\)']+$"
406406
for _, column in self.columns.items():
407-
if not re.match(r"^[a-zA-Z0-9,_.]+$", column.name):
407+
if not re.match(r"^[a-zA-Z0-9 _\-\.\+\(\)']+$", column.name):
408408
raise ValueError(
409409
f"Column name '{column.name}' contains invalid characters. "
410410
"Names may only contain: letters, numbers, spaces, underscores, "
@@ -3513,7 +3513,9 @@ async def _chunk_and_upload_df(
35133513
)
35143514
progress_bar = tqdm(
35153515
total=total_df_bytes,
3516-
desc="Splitting DataFrame and uploading chunks",
3516+
desc="Splitting DataFrame and uploading chunks"
3517+
if len(chunks_to_upload) > 1
3518+
else "Uploading DataFrame",
35173519
unit_scale=True,
35183520
smoothing=0,
35193521
unit="B",

tests/integration/synapseclient/models/async/test_table_async.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,67 @@ async def test_create_table_with_multiple_columns(
128128
new_table_instance.columns["test_column2"].column_type == ColumnType.INTEGER
129129
)
130130

131+
async def test_create_table_with_many_columns(self, project_model: Project) -> None:
132+
# GIVEN a table with many columns
133+
table_name = str(uuid.uuid4())
134+
table_description = "Test table"
135+
table = Table(
136+
name=table_name,
137+
parent_id=project_model.id,
138+
description=table_description,
139+
columns=[
140+
Column(name="col1", column_type=ColumnType.STRING, id="id1"),
141+
Column(name="col 2", column_type=ColumnType.STRING, id="id2"),
142+
Column(name="col_3", column_type=ColumnType.STRING, id="id3"),
143+
Column(name="col-4", column_type=ColumnType.STRING, id="id4"),
144+
Column(name="col.5", column_type=ColumnType.STRING, id="id5"),
145+
Column(name="col+6", column_type=ColumnType.STRING, id="id6"),
146+
Column(name="col'7", column_type=ColumnType.STRING, id="id7"),
147+
Column(name="col(8)", column_type=ColumnType.STRING, id="id8"),
148+
],
149+
)
150+
151+
# WHEN I store the table
152+
table = await table.store_async(synapse_client=self.syn)
153+
self.schedule_for_cleanup(table.id)
154+
155+
# THEN the table should be created
156+
assert table.id is not None
157+
158+
# AND I can retrieve that table from Synapse
159+
new_table_instance = await Table(id=table.id).get_async(
160+
synapse_client=self.syn, include_columns=True
161+
)
162+
assert new_table_instance is not None
163+
assert new_table_instance.name == table_name
164+
assert new_table_instance.id == table.id
165+
assert new_table_instance.description == table_description
166+
167+
# Verify all column names and types
168+
assert new_table_instance.columns["col1"].name == "col1"
169+
assert new_table_instance.columns["col1"].column_type == ColumnType.STRING
170+
171+
assert new_table_instance.columns["col 2"].name == "col 2"
172+
assert new_table_instance.columns["col 2"].column_type == ColumnType.STRING
173+
174+
assert new_table_instance.columns["col_3"].name == "col_3"
175+
assert new_table_instance.columns["col_3"].column_type == ColumnType.STRING
176+
177+
assert new_table_instance.columns["col-4"].name == "col-4"
178+
assert new_table_instance.columns["col-4"].column_type == ColumnType.STRING
179+
180+
assert new_table_instance.columns["col.5"].name == "col.5"
181+
assert new_table_instance.columns["col.5"].column_type == ColumnType.STRING
182+
183+
assert new_table_instance.columns["col+6"].name == "col+6"
184+
assert new_table_instance.columns["col+6"].column_type == ColumnType.STRING
185+
186+
assert new_table_instance.columns["col'7"].name == "col'7"
187+
assert new_table_instance.columns["col'7"].column_type == ColumnType.STRING
188+
189+
assert new_table_instance.columns["col(8)"].name == "col(8)"
190+
assert new_table_instance.columns["col(8)"].column_type == ColumnType.STRING
191+
131192
async def test_create_table_with_invalid_column(
132193
self, project_model: Project
133194
) -> None:

tests/unit/synapseclient/mixins/unit_test_table_components.py

Lines changed: 56 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -614,27 +614,68 @@ async def test_store_async_no_default_columns(self):
614614
# AND the result should be the same instance
615615
assert result == test_instance
616616

617-
async def test_store_async_invalid_character_in_column_name(self):
618-
# GIVEN a TestClass instance with an invalid character in a column name
619-
test_instance = self.ClassForTest(
617+
@pytest.mark.parametrize(
618+
"invalid_column_name",
619+
[
620+
"col*1", # Invalid character: *
621+
"col/1", # Invalid character: /
622+
"col\\1", # Invalid character: \
623+
"col:1", # Invalid character: :
624+
"col;1", # Invalid character: ;
625+
"col,1", # Invalid character: ,
626+
"col?1", # Invalid character: ?
627+
"col!1", # Invalid character: !
628+
"col@1", # Invalid character: @
629+
"col#1", # Invalid character: #
630+
],
631+
)
632+
async def test_store_async_invalid_character_in_column_name(
633+
self, invalid_column_name
634+
):
635+
# GIVEN a TestClass instance with an invalid column name
636+
test_instance = TestViewStoreMixin.ClassForTest(
620637
include_default_columns=False,
621638
columns={
622-
"col*1": Column(name="col*1", column_type=ColumnType.STRING, id="id1")
639+
invalid_column_name: Column(
640+
name=invalid_column_name, column_type=ColumnType.STRING, id="id1"
641+
)
623642
},
624643
)
625644

626-
with patch(GET_ID_PATCH, return_value=None):
627-
# WHEN store_async is awaited
628-
# THEN a ValueError should be raised
629-
with pytest.raises(
630-
ValueError,
631-
match=re.escape(
632-
"Column name 'col*1' contains invalid characters. "
633-
"Names may only contain: letters, numbers, spaces, underscores, "
634-
"hyphens, periods, plus signs, apostrophes, and parentheses."
645+
# WHEN store_async is awaited
646+
# THEN a ValueError should be raised with the appropriate message
647+
with pytest.raises(
648+
ValueError,
649+
match=re.escape(
650+
f"Column name '{invalid_column_name}' contains invalid characters. "
651+
"Names may only contain: letters, numbers, spaces, underscores, "
652+
"hyphens, periods, plus signs, apostrophes, and parentheses."
653+
),
654+
):
655+
await test_instance.store_async(synapse_client=None, dry_run=True)
656+
657+
async def test_store_async_valid_characters_in_column_name(self):
658+
# GIVEN a TestClass instance with valid characters in column names
659+
test_instance = self.ClassForTest(
660+
include_default_columns=False,
661+
columns={
662+
"col1": Column(name="col1", column_type=ColumnType.STRING, id="id1"),
663+
"col 2": Column(name="col 2", column_type=ColumnType.STRING, id="id2"),
664+
"col_3": Column(name="col_3", column_type=ColumnType.STRING, id="id3"),
665+
"col-4": Column(name="col-4", column_type=ColumnType.STRING, id="id4"),
666+
"col.5": Column(name="col.5", column_type=ColumnType.STRING, id="id5"),
667+
"col+6": Column(name="col+6", column_type=ColumnType.STRING, id="id6"),
668+
"col'7": Column(name="col'7", column_type=ColumnType.STRING, id="id7"),
669+
"col(8)": Column(
670+
name="col(8)", column_type=ColumnType.STRING, id="id8"
635671
),
636-
):
637-
await test_instance.store_async(synapse_client=self.syn, dry_run=True)
672+
},
673+
)
674+
675+
# WHEN store_async is awaited
676+
await test_instance.store_async(synapse_client=self.syn, dry_run=True)
677+
678+
# THEN No exception should be raised
638679

639680

640681
class TestDeleteMixin:

0 commit comments

Comments
 (0)