-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathembulk-sample.yml.liquid
More file actions
54 lines (53 loc) · 1.99 KB
/
embulk-sample.yml.liquid
File metadata and controls
54 lines (53 loc) · 1.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# This configuration connects to SQL Server using Native ODBC Driver 17,
# extracts data from a specified table, and outputs it in snappy-compressed Parquet format to both Wasabi and AWS S3.
# Connection details and credentials are managed via environment variables.
# Timestamps are formatted with a specified timezone.
# Make sure the ODBC driver is properly installed and registered on your system.
# This setup is a typical ETL pattern for efficient data export and cloud storage redundancy.
in:
type: sqlserver
# Use ODBC Native Driver 17 for SQL Server
native_driver: "ODBC Driver 17 for SQL Server"
# for postgresql
# driver_path: {{ env.POSTGRESQL_JDBC_DRIVER_PATH }}
host: {{ env.DB_HOST }}
user: {{ env.DB_USER }}
password: {{ env.DB_PASS }}
database: {{ env.DB_NAME }}
fetch_rows: 100000
schema: dbo
table: {{ env.TABLE_NAME}}
default_column_options:
TIMESTAMP: {type: string, timestamp_format: '%Y/%m/%d %H:%M:%S %:z', timezone: '+0900'}
BIGINT: {type: string}
out:
# Use output_multi plugin to output to both Wasabi and AWS
type: multi
outputs:
# WASABI
- type: s3_parquet
endpoint: {{ env.WASABI_ENDPOINT }}
region: {{ env.WASABI_REGION }}
bucket: {{ env.WASABI_BUCKET }}
path_prefix: {{ env.WASABI_PATH_PREFIX }}
file_ext: snappy.parquet
compression_codec: snappy
default_timezone: Asia/Tokyo
auth_method: basic
access_key_id: {{ env.WASABI_ACCESS_KEY }}
secret_access_key: {{ env.WASABI_SECRET }}
# AWS S3
- type: s3_parquet
endpoint: {{ env.AWS_ENDPOINT }}
region: {{ env.AWS_REGION }}
bucket: {{ env.AWS_BUCKET }}
path_prefix: {{ env.AWS_PATH_PREFIX }}
file_ext: snappy.parquet
compression_codec: snappy
default_timezone: Asia/Tokyo
auth_method: basic
access_key_id: {{ env.AWS_ACCESS_KEY }}
secret_access_key: {{ env.AWS_SECRET }}
default_column_options:
timestamp: {converted_type: timestamp-millis}
exec: {min_output_tasks: 1}