Skip to content

Commit 71949cc

Browse files
authored
Merge pull request #532 from cody-scott/seed-logic-fix
readded batch logic and included a test
2 parents 9b6a659 + fdeac01 commit 71949cc

File tree

2 files changed

+98
-0
lines changed

2 files changed

+98
-0
lines changed
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
{% macro sqlserver__get_binding_char() %}
2+
{{ return('?') }}
3+
{% endmacro %}
4+
5+
{% macro sqlserver__get_batch_size() %}
6+
{{ return(400) }}
7+
{% endmacro %}
8+
9+
{% macro calc_batch_size(num_columns) %}
10+
{#
11+
SQL Server allows for a max of 2098 parameters in a single statement.
12+
Check if the max_batch_size fits with the number of columns, otherwise
13+
reduce the batch size so it fits.
14+
#}
15+
{% set max_batch_size = get_batch_size() %}
16+
{% set calculated_batch = (2098 / num_columns)|int %}
17+
{% set batch_size = [max_batch_size, calculated_batch] | min %}
18+
19+
{{ return(batch_size) }}
20+
{% endmacro %}
21+
22+
{% macro sqlserver__load_csv_rows(model, agate_table) %}
23+
{% set cols_sql = get_seed_column_quoted_csv(model, agate_table.column_names) %}
24+
{% set batch_size = calc_batch_size(agate_table.column_names|length) %}
25+
{% set bindings = [] %}
26+
{% set statements = [] %}
27+
28+
{{ log("Inserting batches of " ~ batch_size ~ " records") }}
29+
30+
{% for chunk in agate_table.rows | batch(batch_size) %}
31+
{% set bindings = [] %}
32+
33+
{% for row in chunk %}
34+
{% do bindings.extend(row) %}
35+
{% endfor %}
36+
37+
{% set sql %}
38+
insert into {{ this.render() }} ({{ cols_sql }}) values
39+
{% for row in chunk -%}
40+
({%- for column in agate_table.column_names -%}
41+
{{ get_binding_char() }}
42+
{%- if not loop.last%},{%- endif %}
43+
{%- endfor -%})
44+
{%- if not loop.last%},{%- endif %}
45+
{%- endfor %}
46+
{% endset %}
47+
48+
{% do adapter.add_query(sql, bindings=bindings, abridge_sql_log=True) %}
49+
50+
{% if loop.index0 == 0 %}
51+
{% do statements.append(sql) %}
52+
{% endif %}
53+
{% endfor %}
54+
55+
{# Return SQL so we can render it out into the compiled files #}
56+
{{ return(statements[0]) }}
57+
{% endmacro %}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import pytest
2+
from dbt.tests.util import run_dbt
3+
4+
seed_schema_yml = """
5+
version: 2
6+
seeds:
7+
- name: raw_data
8+
"""
9+
10+
11+
class TestLargeSeed:
12+
def build_large_seed_file(self):
13+
row_count = 3000
14+
column_count = 10
15+
16+
headers = ",".join(["id"] + [f"column_{_}" for _ in range(1, column_count)])
17+
seed_data = [headers]
18+
for row in range(1, row_count):
19+
row_data = [str(row)]
20+
for column in range(1, column_count):
21+
row_data += [str(column)]
22+
23+
row_data = ",".join(row_data)
24+
seed_data += [row_data]
25+
26+
large_seed_file = "\n".join(seed_data)
27+
return large_seed_file
28+
29+
@pytest.fixture(scope="class")
30+
def project_config_update(self):
31+
return {"name": "generic_tests"}
32+
33+
@pytest.fixture(scope="class")
34+
def seeds(self):
35+
return {
36+
"raw_data.csv": self.build_large_seed_file(),
37+
"schema.yml": seed_schema_yml,
38+
}
39+
40+
def test_large_seed(self, project):
41+
run_dbt(["seed"])

0 commit comments

Comments
 (0)