Skip to content

Commit bb60d04

Browse files
author
Ilyas Gasanov
committed
tmp
1 parent 407f230 commit bb60d04

File tree

5 files changed

+203
-23
lines changed

5 files changed

+203
-23
lines changed

poetry.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,8 @@ pyjwt = { version = "^2.10.1", optional = true }
5959
jinja2 = { version = "^3.1.4", optional = true }
6060
python-multipart = { version = ">=0.0.9,<0.0.21", optional = true }
6161
celery = { version = "^5.4.0", optional = true }
62-
onetl = { git = "https://github.com/MobileTeleSystems/onetl.git", rev = "develop", extras = ["spark"] }
62+
# TODO: switch to 0.13.0 before next SyncMaster release
63+
onetl = { git = "https://github.com/MobileTeleSystems/onetl.git", branch = "develop", extras = ["spark"] }
6364
pyyaml = {version = "*", optional = true}
6465
# due to not supporting MacOS 14.x https://www.psycopg.org/psycopg3/docs/news.html#psycopg-3-1-20
6566
psycopg = { version = ">=3.1.0,<3.2.5", extras = ["binary"], optional = true }
@@ -132,7 +133,8 @@ pytest-randomly = "^3.15.0"
132133
pytest-deadfixtures = "^2.2.1"
133134
pytest-mock = "^3.14.0"
134135
pytest-lazy-fixtures = "^1.1.1"
135-
onetl = { git = "https://github.com/MobileTeleSystems/onetl.git", rev = "develop", extras = ["spark", "s3", "hdfs", "files"] }
136+
# TODO: switch to 0.13.0 before next SyncMaster release
137+
onetl = { git = "https://github.com/MobileTeleSystems/onetl.git", branch = "develop", extras = ["spark", "s3", "hdfs", "files"] }
136138
faker = ">=33.3,<36.0"
137139
coverage = "^7.6.1"
138140
gevent = "^24.2.1"

syncmaster/schemas/v1/transfers/transformations/file_metadata_filter.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,48 @@
11
# SPDX-FileCopyrightText: 2023-2024 MTS PJSC
22
# SPDX-License-Identifier: Apache-2.0
3+
import glob
4+
import re
35
from typing import Annotated, Literal
46

5-
from pydantic import BaseModel, Field
7+
from pydantic import BaseModel, ByteSize, Field, field_validator
68

79
from syncmaster.schemas.v1.transformation_types import FILE_METADATA_FILTER
810

911

10-
class BaseMetadataFilter(BaseModel):
12+
class NameGlobFilter(BaseModel):
13+
type: Literal["name_glob"]
1114
value: str
1215

16+
@field_validator("value", mode="before")
17+
def _validate_pattern(cls, value: str) -> str:
18+
if not glob.has_magic(value):
19+
raise ValueError(f"Invalid glob: {value!r}")
1320

14-
class NameGlobFilter(BaseMetadataFilter):
15-
type: Literal["name_glob"]
21+
return value
1622

1723

18-
class NameRegexpFilter(BaseMetadataFilter):
24+
class NameRegexpFilter(BaseModel):
1925
type: Literal["name_regexp"]
26+
value: str
27+
28+
@field_validator("value", mode="before")
29+
def _validate_pattern(cls, value: str) -> str:
30+
try:
31+
re.compile(value)
32+
except re.error as e:
33+
raise ValueError(f"Invalid regexp: {value!r}") from e
34+
35+
return value
2036

2137

22-
class FileSizeMinFilter(BaseMetadataFilter):
38+
class FileSizeMinFilter(BaseModel):
2339
type: Literal["file_size_min"]
40+
value: ByteSize
2441

2542

26-
class FileSizeMaxFilter(BaseMetadataFilter):
43+
class FileSizeMaxFilter(BaseModel):
2744
type: Literal["file_size_max"]
45+
value: ByteSize
2846

2947

3048
MetadataFilter = NameGlobFilter | NameRegexpFilter | FileSizeMinFilter | FileSizeMaxFilter

tests/test_unit/test_transfers/test_create_transfer.py

Lines changed: 151 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
pytestmark = [pytest.mark.asyncio, pytest.mark.server]
1010

1111

12+
@pytest.mark.parametrize("connection_type", ["ftp"], indirect=True)
1213
async def test_developer_plus_can_create_transfer(
1314
client: AsyncClient,
1415
two_group_connections: tuple[MockConnection, MockConnection],
@@ -33,8 +34,20 @@ async def test_developer_plus_can_create_transfer(
3334
"schedule": "",
3435
"source_connection_id": first_connection.id,
3536
"target_connection_id": second_connection.id,
36-
"source_params": {"type": "postgres", "table_name": "source_table"},
37-
"target_params": {"type": "postgres", "table_name": "target_table"},
37+
"source_params": {
38+
"type": "ftp",
39+
"directory_path": "/source_path",
40+
"file_format": {
41+
"type": "csv",
42+
},
43+
},
44+
"target_params": {
45+
"type": "ftp",
46+
"directory_path": "/target_path",
47+
"file_format": {
48+
"type": "csv",
49+
},
50+
},
3851
"strategy_params": {"type": "full"},
3952
"transformations": [
4053
{
@@ -71,6 +84,23 @@ async def test_developer_plus_can_create_transfer(
7184
},
7285
],
7386
},
87+
{
88+
"type": "file_metadata_filter",
89+
"filters": [
90+
{
91+
"type": "name_glob",
92+
"value": "*.csv",
93+
},
94+
{
95+
"type": "name_regexp",
96+
"value": "^1234$",
97+
},
98+
{
99+
"type": "file_size_min",
100+
"value": "1kb",
101+
},
102+
],
103+
},
74104
],
75105
"queue_id": group_queue.id,
76106
},
@@ -466,12 +496,12 @@ async def test_superuser_can_create_transfer(
466496
"location": ["body", "transformations", 0],
467497
"message": (
468498
"Input tag 'some unknown transformation type' found using 'type' "
469-
"does not match any of the expected tags: 'dataframe_rows_filter', 'dataframe_columns_filter'"
499+
"does not match any of the expected tags: 'dataframe_rows_filter', 'dataframe_columns_filter', 'file_metadata_filter'"
470500
),
471501
"code": "union_tag_invalid",
472502
"context": {
473503
"discriminator": "'type'",
474-
"expected_tags": "'dataframe_rows_filter', 'dataframe_columns_filter'",
504+
"expected_tags": "'dataframe_rows_filter', 'dataframe_columns_filter', 'file_metadata_filter'",
475505
"tag": "some unknown transformation type",
476506
},
477507
"input": {
@@ -572,6 +602,123 @@ async def test_superuser_can_create_transfer(
572602
},
573603
},
574604
),
605+
(
606+
{
607+
"transformations": [
608+
{
609+
"type": "file_metadata_filter",
610+
"filters": [
611+
{
612+
"type": "glob",
613+
"value": "*.csv",
614+
},
615+
],
616+
},
617+
],
618+
},
619+
{
620+
"error": {
621+
"code": "invalid_request",
622+
"message": "Invalid request",
623+
"details": [
624+
{
625+
"location": ["body", "transformations", 0, "file_metadata_filter", "filters", 0],
626+
"message": (
627+
"Input tag 'glob' found using 'type' does not match any of the expected tags: 'name_glob', 'name_regexp', 'file_size_min', 'file_size_max'"
628+
),
629+
"code": "union_tag_invalid",
630+
"context": {
631+
"discriminator": "'type'",
632+
"tag": "glob",
633+
"expected_tags": "'name_glob', 'name_regexp', 'file_size_min', 'file_size_max'",
634+
},
635+
"input": {
636+
"type": "glob",
637+
"value": "*.csv",
638+
},
639+
},
640+
],
641+
},
642+
},
643+
),
644+
(
645+
{
646+
"transformations": [
647+
{
648+
"type": "file_metadata_filter",
649+
"filters": [
650+
{
651+
"type": "name_glob",
652+
"value": ".csv",
653+
},
654+
],
655+
},
656+
],
657+
},
658+
{
659+
"error": {
660+
"code": "invalid_request",
661+
"message": "Invalid request",
662+
"details": [
663+
{
664+
"location": [
665+
"body",
666+
"transformations",
667+
0,
668+
"file_metadata_filter",
669+
"filters",
670+
0,
671+
"name_glob",
672+
"value",
673+
],
674+
"message": "Value error, Invalid glob: '.csv'",
675+
"code": "value_error",
676+
"context": {},
677+
"input": ".csv",
678+
},
679+
],
680+
},
681+
},
682+
),
683+
(
684+
{
685+
"transformations": [
686+
{
687+
"type": "file_metadata_filter",
688+
"filters": [
689+
{
690+
"type": "name_regexp",
691+
"value": "[a-z",
692+
},
693+
],
694+
},
695+
],
696+
},
697+
{
698+
"error": {
699+
"code": "invalid_request",
700+
"message": "Invalid request",
701+
"details": [
702+
{
703+
"location": [
704+
"body",
705+
"transformations",
706+
0,
707+
"file_metadata_filter",
708+
"filters",
709+
0,
710+
"name_regexp",
711+
"value",
712+
],
713+
"message": "Value error, Invalid regexp: '[a-z'",
714+
"code": "value_error",
715+
"context": {},
716+
"input": "[a-z",
717+
},
718+
],
719+
},
720+
},
721+
),
575722
),
576723
)
577724
async def test_check_fields_validation_on_create_transfer(

tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
[
1212
{
1313
"source_and_target_params": {
14-
"type": "s3",
14+
"type": "ftp",
1515
"directory_path": "/some/pure/path",
1616
"file_format": {
1717
"delimiter": ",",
@@ -31,7 +31,7 @@
3131
},
3232
{
3333
"source_and_target_params": {
34-
"type": "s3",
34+
"type": "ftp",
3535
"directory_path": "/some/excel/path",
3636
"file_format": {
3737
"type": "excel",
@@ -46,7 +46,7 @@
4646
},
4747
{
4848
"source_and_target_params": {
49-
"type": "s3",
49+
"type": "ftp",
5050
"directory_path": "/some/xml/path",
5151
"file_format": {
5252
"type": "xml",
@@ -62,7 +62,7 @@
6262
},
6363
{
6464
"source_and_target_params": {
65-
"type": "s3",
65+
"type": "ftp",
6666
"directory_path": "/some/orc/path",
6767
"file_format": {
6868
"type": "orc",
@@ -73,7 +73,7 @@
7373
},
7474
{
7575
"source_and_target_params": {
76-
"type": "s3",
76+
"type": "ftp",
7777
"directory_path": "/some/parquet/path",
7878
"file_format": {
7979
"type": "parquet",
@@ -106,6 +106,19 @@
106106
},
107107
],
108108
},
109+
{
110+
"type": "file_metadata_filter",
111+
"filters": [
112+
{
113+
"type": "name_glob",
114+
"value": "*.csv",
115+
},
116+
{
117+
"type": "name_regexp",
118+
"value": "^1234$",
119+
},
120+
],
121+
},
109122
],
110123
},
111124
],
@@ -114,10 +127,10 @@
114127
"connection_type,create_connection_data",
115128
[
116129
(
117-
"s3",
130+
"ftp",
118131
{
119132
"host": "localhost",
120-
"port": 443,
133+
"port": 80,
121134
},
122135
),
123136
],
@@ -150,13 +163,13 @@ async def test_developer_plus_can_update_s3_transfer(
150163
headers={"Authorization": f"Bearer {user.token}"},
151164
json={
152165
"source_params": {
153-
"type": "s3",
166+
"type": "ftp",
154167
"directory_path": "/some/new/test/directory",
155168
"file_format": create_transfer_data["source_and_target_params"]["file_format"],
156169
"options": {"some": "option"},
157170
},
158171
"target_params": {
159-
"type": "s3",
172+
"type": "ftp",
160173
"directory_path": "/some/new/test/directory",
161174
"file_format": create_transfer_data["source_and_target_params"]["file_format"],
162175
"file_name_template": "{index}.{extension}",

0 commit comments

Comments
 (0)