-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmeltano.yml
More file actions
95 lines (87 loc) · 3.07 KB
/
meltano.yml
File metadata and controls
95 lines (87 loc) · 3.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
version: 1
send_anonymous_usage_stats: true
project_id: "tap-spreadsheets"
default_environment: test
venv:
backend: uv
environments:
- name: test
plugins:
extractors:
- name: tap-spreadsheets
namespace: tap_spreadsheets
pip_url: -e .
capabilities:
- state
- catalog
- discover
- about
- stream-maps
settings:
- name: files
kind: array
label: Files
description: >
List of file configurations. Each entry is an object with keys:
- path (string): Glob expression (local or S3).
- format (string): 'excel' or 'csv'.
- worksheet (string): Worksheet index, name or regular expression (Excel only). Using regular expressions, any matching worksheet will be processed.
- table_name (string): Optional stream name (defaults to file name).
- primary_keys (array): List of PK column names.
- drop_empty (boolean): Drop rows with empty/null PKs.
- skip_columns (integer): Number of leading columns to skip.
- skip_rows (integer): Rows to skip before headers.
- sample_rows (integer): Rows to sample for schema inference.
- column_headers (array): Explicit column headers.
- delimiter (string): CSV delimiter, default ','. Inferred if not provided.
- quotechar (string): CSV quote char, default '"'. Inferred if not provided.
- schema_overrides (dict): overrride JSON schema definition per field. Eg. { my_column_name: { type: [string, "null"] } }
settings_group_validation:
- [files]
config:
files:
# - path: data/*.xlsx
# format: excel
# table_name: test
# primary_keys: [date, total]
# drop_empty: true
# worksheet: Sheet1
# schema_overrides:
# total:
# type: [string, "null"]
# - path: data/*.xlsx
# format: excel
# table_name: test_skip
# primary_keys: [date, total]
# drop_empty: true
# worksheet: Sheet2
# skip_columns: 1
# skip_rows: 4
# - path: data/*.xlsx
# format: excel
# table_name: test_regex
# primary_keys: [date, total]
# drop_empty: true
# worksheet: "report_[0-9]+"
# - path: data/*.xlsx
# format: excel
# worksheet: Sheet2
# table_name: excel_table
# primary_keys: [date, total]
# drop_empty: true
# skip_columns: 1
# skip_rows: 4
# - path: data/test.csv
# format: csv
# table_name: csv
# primary_keys: [date, total]
# drop_empty: true
- path: s3://local-data/01*.csv
format: csv
table_name: 01_normalized
primary_keys: [date_start]
drop_empty: true
loaders:
- name: target-jsonl
variant: andyh1203
pip_url: target-jsonl