Skip to content

Commit cda4e5d

Browse files
author
Ilyas Gasanov
committed
Review
1 parent 8435484 commit cda4e5d

File tree

6 files changed

+42
-43
lines changed

6 files changed

+42
-43
lines changed

.env.docker

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ TEST_SFTP_PORT_FOR_CONFTEST=2222
113113
TEST_SFTP_HOST_FOR_WORKER=test-sftp
114114
TEST_SFTP_PORT_FOR_WORKER=2222
115115
TEST_SFTP_USER=syncmaster
116-
TEST_SFTP_PASSWORD=AesujeifohgoaCu0Boosiet5aimeitho
116+
TEST_SFTP_PASSWORD=test_only
117117

118118
SPARK_CONF_DIR=/app/tests/spark/hive/conf/
119119
HADOOP_CONF_DIR=/app/tests/spark/hadoop/

.env.local

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ export TEST_SFTP_PORT_FOR_CONFTEST=2222
100100
export TEST_SFTP_HOST_FOR_WORKER=test-sftp
101101
export TEST_SFTP_PORT_FOR_WORKER=2222
102102
export TEST_SFTP_USER=syncmaster
103-
export TEST_SFTP_PASSWORD=AesujeifohgoaCu0Boosiet5aimeitho
103+
export TEST_SFTP_PASSWORD=test_only
104104

105105
export SPARK_CONF_DIR=./tests/spark/hive/conf/
106106
export HADOOP_CONF_DIR=./tests/spark/hadoop/

docker-compose.test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ services:
277277
USER_NAME: syncmaster
278278
PASSWORD_ACCESS: true
279279
SUDO_ACCESS: true
280-
USER_PASSWORD: AesujeifohgoaCu0Boosiet5aimeitho
280+
USER_PASSWORD: test_only
281281
profiles: [sftp, all]
282282

283283
volumes:

syncmaster/worker/handlers/file/sftp.py

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from __future__ import annotations
55

6+
import tempfile
67
from typing import TYPE_CHECKING
78

89
from onetl.connection import SFTP, SparkLocalFS
@@ -24,7 +25,7 @@ def connect(self, spark: SparkSession) -> None:
2425
port=self.connection_dto.port,
2526
user=self.connection_dto.user,
2627
password=self.connection_dto.password,
27-
compress=False,
28+
compress=False, # to avoid errors from combining file and SCP-level compression
2829
).check()
2930
self.local_connection = SparkLocalFS(
3031
spark=spark,
@@ -33,23 +34,23 @@ def connect(self, spark: SparkSession) -> None:
3334
def read(self) -> DataFrame:
3435
from pyspark.sql.types import StructType
3536

36-
downloader = FileDownloader(
37-
connection=self.connection,
38-
source_path=self.transfer_dto.directory_path,
39-
temp_path="/tmp/syncmaster",
40-
local_path="/tmp/syncmaster/sftp",
41-
options={"if_exists": "replace_entire_directory"},
42-
)
43-
downloader.run()
44-
45-
reader = FileDFReader(
46-
connection=self.local_connection,
47-
format=self.transfer_dto.file_format,
48-
source_path="/tmp/syncmaster/sftp",
49-
df_schema=StructType.fromJson(self.transfer_dto.df_schema) if self.transfer_dto.df_schema else None,
50-
options=self.transfer_dto.options,
51-
)
52-
df = reader.run()
37+
with tempfile.TemporaryDirectory(prefix="syncmaster_sftp_") as temp_dir:
38+
downloader = FileDownloader(
39+
connection=self.connection,
40+
source_path=self.transfer_dto.directory_path,
41+
local_path=temp_dir,
42+
)
43+
downloader.run()
44+
45+
reader = FileDFReader(
46+
connection=self.local_connection,
47+
format=self.transfer_dto.file_format,
48+
source_path=temp_dir,
49+
df_schema=StructType.fromJson(self.transfer_dto.df_schema) if self.transfer_dto.df_schema else None,
50+
options=self.transfer_dto.options,
51+
)
52+
df = reader.run()
53+
df.cache().count()
5354

5455
rows_filter_expression = self._get_rows_filter_expression()
5556
if rows_filter_expression:
@@ -62,19 +63,18 @@ def read(self) -> DataFrame:
6263
return df
6364

6465
def write(self, df: DataFrame) -> None:
65-
writer = FileDFWriter(
66-
connection=self.local_connection,
67-
format=self.transfer_dto.file_format,
68-
target_path="/tmp/syncmaster/sftp",
69-
options=self.transfer_dto.options,
70-
)
71-
writer.run(df=df)
72-
73-
uploader = FileUploader(
74-
connection=self.connection,
75-
local_path="/tmp/syncmaster/sftp",
76-
temp_path="/config/target", # SFTP host
77-
target_path=self.transfer_dto.directory_path,
78-
options={"if_exists": "replace_entire_directory"},
79-
)
80-
uploader.run()
66+
with tempfile.TemporaryDirectory(prefix="syncmaster_sftp_") as temp_dir:
67+
writer = FileDFWriter(
68+
connection=self.local_connection,
69+
format=self.transfer_dto.file_format,
70+
target_path=temp_dir,
71+
options=self.transfer_dto.options,
72+
)
73+
writer.run(df=df)
74+
75+
uploader = FileUploader(
76+
connection=self.connection,
77+
local_path=temp_dir,
78+
target_path=self.transfer_dto.directory_path,
79+
)
80+
uploader.run()

tests/test_integration/test_run_transfer/connection_fixtures/sftp_fixtures.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def sftp_file_connection(sftp_for_conftest):
8484
port=sftp_for_conftest.port,
8585
user=sftp_for_conftest.user,
8686
password=sftp_for_conftest.password,
87-
compress=False,
87+
compress=False, # to avoid errors from combining file and SCP-level compression
8888
)
8989

9090

tests/test_integration/test_run_transfer/test_sftp.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
import secrets
3+
from pathlib import Path
34

45
import pytest
56
import pytest_asyncio
@@ -246,6 +247,7 @@ async def test_run_transfer_postgres_to_sftp(
246247
postgres_to_sftp: Transfer,
247248
target_file_format,
248249
file_format_flavor: str,
250+
tmp_path: Path,
249251
):
250252
format_name, format = target_file_format
251253

@@ -279,18 +281,15 @@ async def test_run_transfer_postgres_to_sftp(
279281
downloader = FileDownloader(
280282
connection=sftp_file_connection,
281283
source_path=f"/config/target/{format_name}/{file_format_flavor}",
282-
temp_path="/tmp/syncmaster",
283-
local_path="/tmp/syncmaster/sftp",
284-
options={"if_exists": "replace_entire_directory"},
284+
local_path=tmp_path,
285285
)
286286
downloader.run()
287287

288288
reader = FileDFReader(
289289
connection=sftp_file_df_connection,
290290
format=format,
291-
source_path="/tmp/syncmaster/sftp",
291+
source_path=tmp_path,
292292
df_schema=init_df.schema,
293-
options={},
294293
)
295294
df = reader.run()
296295

0 commit comments

Comments
 (0)