Skip to content

Commit d820def

Browse files
[DOP-22141] Add logic for handling SFTP transfers (#189)
1 parent 85618f1 commit d820def

36 files changed

+2850
-1455
lines changed

.env.docker

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,13 @@ TEST_HDFS_HOST=test-hive
108108
TEST_HDFS_WEBHDFS_PORT=9870
109109
TEST_HDFS_IPC_PORT=9820
110110

111+
TEST_SFTP_HOST_FOR_CONFTEST=test-sftp
112+
TEST_SFTP_PORT_FOR_CONFTEST=2222
113+
TEST_SFTP_HOST_FOR_WORKER=test-sftp
114+
TEST_SFTP_PORT_FOR_WORKER=2222
115+
TEST_SFTP_USER=syncmaster
116+
TEST_SFTP_PASSWORD=test_only
117+
111118
SPARK_CONF_DIR=/app/tests/spark/hive/conf/
112119
HADOOP_CONF_DIR=/app/tests/spark/hadoop/
113120
HIVE_CONF_DIR=/app/tests/spark/hive/conf/

.env.local

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,13 @@ export TEST_HDFS_HOST=test-hive
9595
export TEST_HDFS_WEBHDFS_PORT=9870
9696
export TEST_HDFS_IPC_PORT=9820
9797

98+
export TEST_SFTP_HOST_FOR_CONFTEST=localhost
99+
export TEST_SFTP_PORT_FOR_CONFTEST=2222
100+
export TEST_SFTP_HOST_FOR_WORKER=test-sftp
101+
export TEST_SFTP_PORT_FOR_WORKER=2222
102+
export TEST_SFTP_USER=syncmaster
103+
export TEST_SFTP_PASSWORD=test_only
104+
98105
export SPARK_CONF_DIR=./tests/spark/hive/conf/
99106
export HADOOP_CONF_DIR=./tests/spark/hadoop/
100107
export HIVE_CONF_DIR=./tests/spark/hive/conf/

.github/workflows/sftp-tests.yml

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
name: SFTP tests
2+
on:
3+
workflow_call:
4+
5+
env:
6+
DEFAULT_PYTHON: '3.12'
7+
8+
jobs:
9+
test:
10+
name: Run SFTP tests
11+
runs-on: ubuntu-latest
12+
13+
steps:
14+
- name: Checkout code
15+
uses: actions/checkout@v4
16+
17+
- name: Set up QEMU
18+
uses: docker/setup-qemu-action@v3
19+
20+
- name: Set up Docker Buildx
21+
uses: docker/setup-buildx-action@v3
22+
23+
- name: Cache jars
24+
uses: actions/cache@v4
25+
with:
26+
path: ./cached_jars
27+
key: ${{ runner.os }}-python-${{ env.DEFAULT_PYTHON }}-test-sftp
28+
restore-keys: |
29+
${{ runner.os }}-python-${{ env.DEFAULT_PYTHON }}-test-sftp
30+
${{ runner.os }}-python-
31+
32+
- name: Build Worker Image
33+
uses: docker/build-push-action@v6
34+
with:
35+
context: .
36+
tags: mtsrus/syncmaster-worker:${{ github.sha }}
37+
target: test
38+
file: docker/Dockerfile.worker
39+
load: true
40+
cache-from: mtsrus/syncmaster-worker:develop
41+
42+
- name: Docker compose up
43+
run: |
44+
docker compose -f docker-compose.test.yml --profile all down -v --remove-orphans
45+
docker compose -f docker-compose.test.yml --profile sftp up -d --wait --wait-timeout 200
46+
env:
47+
WORKER_IMAGE_TAG: ${{ github.sha }}
48+
49+
- name: Run SFTP Tests
50+
run: |
51+
docker compose -f ./docker-compose.test.yml --profile sftp exec -T worker coverage run -m pytest -vvv -s -m "worker and sftp"
52+
53+
- name: Dump worker logs on failure
54+
if: failure()
55+
uses: jwalton/gh-docker-logs@v2
56+
with:
57+
images: mtsrus/syncmaster-worker
58+
dest: ./logs
59+
60+
# This is important, as coverage is exported after receiving SIGTERM
61+
- name: Shutdown
62+
if: always()
63+
run: |
64+
docker compose -f docker-compose.test.yml --profile all down -v --remove-orphans
65+
66+
- name: Upload worker logs
67+
uses: actions/upload-artifact@v4
68+
if: failure()
69+
with:
70+
name: worker-logs-sftp
71+
path: logs/*
72+
73+
- name: Upload coverage results
74+
uses: actions/upload-artifact@v4
75+
with:
76+
name: coverage-sftp
77+
path: reports/*
78+
# https://github.com/actions/upload-artifact/issues/602
79+
include-hidden-files: true

.github/workflows/tests.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ jobs:
4444
name: S3 tests
4545
uses: ./.github/workflows/s3-tests.yml
4646

47+
sftp_tests:
48+
name: SFTP tests
49+
uses: ./.github/workflows/sftp-tests.yml
50+
4751
scheduler_tests:
4852
name: Scheduler tests
4953
uses: ./.github/workflows/scheduler-tests.yml
@@ -56,7 +60,7 @@ jobs:
5660
name: Tests done
5761
runs-on: ubuntu-latest
5862

59-
needs: [oracle_tests, clickhouse_tests, mssql_tests, mysql_tests, hive_tests, hdfs_tests, s3_tests, unit_tests]
63+
needs: [unit_tests, scheduler_tests, oracle_tests, clickhouse_tests, mssql_tests, mysql_tests, hive_tests, hdfs_tests, s3_tests, sftp_tests]
6064
steps:
6165
- name: Checkout code
6266
uses: actions/checkout@v4

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,10 @@ test-integration-s3: test-db ##@Test Run integration tests for S3
109109
docker compose -f docker-compose.test.yml --profile s3 up -d --wait $(DOCKER_COMPOSE_ARGS)
110110
${POETRY} run pytest ./tests/test_integration -m s3 $(PYTEST_ARGS)
111111

112+
test-integration-sftp: test-db ##@Test Run integration tests for SFTP
113+
docker compose -f docker-compose.test.yml --profile sftp up -d --wait $(DOCKER_COMPOSE_ARGS)
114+
${POETRY} run pytest ./tests/test_integration -m sftp $(PYTEST_ARGS)
115+
112116
test-integration: test-db ##@Test Run all integration tests
113117
docker compose -f docker-compose.test.yml --profile all up -d --wait $(DOCKER_COMPOSE_ARGS)
114118
${POETRY} run pytest ./tests/test_integration $(PYTEST_ARGS)

README.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ List of currently supported connections:
4040
* MySQL
4141
* HDFS
4242
* S3
43+
* SFTP
4344

4445
Current Data.SyncMaster implementation provides following components:
4546

docker-compose.test.yml

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ services:
125125
condition: service_completed_successfully
126126
rabbitmq:
127127
condition: service_healthy
128-
profiles: [worker, scheduler, s3, oracle, hdfs, hive, clickhouse, mysql, mssql, all]
128+
profiles: [worker, scheduler, s3, oracle, hdfs, hive, clickhouse, mysql, mssql, sftp, all]
129129

130130
test-postgres:
131131
image: postgres
@@ -139,7 +139,7 @@ services:
139139
interval: 30s
140140
timeout: 5s
141141
retries: 3
142-
profiles: [s3, oracle, clickhouse, mysql, mssql, hdfs, hive, all]
142+
profiles: [s3, oracle, clickhouse, mysql, mssql, hdfs, hive, sftp, all]
143143

144144
test-s3:
145145
image: bitnami/minio:latest
@@ -225,7 +225,7 @@ services:
225225
interval: 30s
226226
timeout: 5s
227227
retries: 3
228-
profiles: [hive, hdfs, s3, all]
228+
profiles: [hive, hdfs, s3, sftp, all]
229229

230230
keycloak:
231231
image: quay.io/keycloak/keycloak:latest
@@ -263,8 +263,22 @@ services:
263263
HIVE_METASTORE_DB_DRIVER: org.postgresql.Driver
264264
HIVE_METASTORE_DB_USER: test_hive
265265
HIVE_METASTORE_DB_PASSWORD: test_hive
266-
# writing spark dataframe to s3 xml file fails without running hive metastore server
267-
profiles: [hive, hdfs, s3, all]
266+
# writing spark dataframe to s3, sftp xml file fails without running hive metastore server
267+
profiles: [hive, hdfs, s3, sftp, all]
268+
269+
test-sftp:
270+
image: ${SFTP_IMAGE:-linuxserver/openssh-server}
271+
restart: unless-stopped
272+
ports:
273+
- 2222:2222
274+
environment:
275+
PUID: 1000
276+
PGID: 1000
277+
USER_NAME: syncmaster
278+
PASSWORD_ACCESS: true
279+
SUDO_ACCESS: true
280+
USER_PASSWORD: test_only
281+
profiles: [sftp, all]
268282

269283
volumes:
270284
postgres_test_data:
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add logic for handling SFTP transfers

0 commit comments

Comments
 (0)