Skip to content

Commit ec36a7b

Browse files
authored
Merge pull request #211 from rlshuhart/patch-1
Fix #210 - Incorrect variable for calc_batch_size
2 parents a26bf91 + e7a16ac commit ec36a7b

File tree

2 files changed

+60
-26
lines changed

2 files changed

+60
-26
lines changed
Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,3 @@
1-
{% macro calc_batch_size(num_columns,max_batch_size) %}
2-
{#
3-
SQL Server allows for a max of 2100 parameters in a single statement.
4-
Check if the max_batch_size fits with the number of columns, otherwise
5-
reduce the batch size so it fits.
6-
#}
7-
{% if num_columns * max_batch_size < 2100 %}
8-
{% set batch_size = max_batch_size %}
9-
{% else %}
10-
{% set batch_size = (2100 / num_columns)|int %}
11-
{% endif %}
12-
13-
{{ return(batch_size) }}
14-
{% endmacro %}
15-
161
{% macro sqlserver__get_binding_char() %}
172
{{ return('?') }}
183
{% endmacro %}
@@ -21,13 +6,27 @@
216
{{ return(400) }}
227
{% endmacro %}
238

24-
{% macro basic_load_csv_rows(model, batch_size, agate_table) %}
9+
{% macro calc_batch_size(num_columns) %}
10+
{#
11+
SQL Server allows for a max of 2100 parameters in a single statement.
12+
Check if the max_batch_size fits with the number of columns, otherwise
13+
reduce the batch size so it fits.
14+
#}
15+
{% set max_batch_size = get_batch_size() %}
16+
{% set calculated_batch = (2100 / num_columns)|int %}
17+
{% set batch_size = [max_batch_size, calculated_batch] | min %}
18+
19+
{{ return(batch_size) }}
20+
{% endmacro %}
2521

22+
{% macro sqlserver__load_csv_rows(model, agate_table) %}
2623
{% set cols_sql = get_seed_column_quoted_csv(model, agate_table.column_names) %}
24+
{% set batch_size = calc_batch_size(agate_table.column_names|length) %}
2725
{% set bindings = [] %}
28-
2926
{% set statements = [] %}
3027

28+
{{ log("Inserting batches of " ~ batch_size ~ " records") }}
29+
3130
{% for chunk in agate_table.rows | batch(batch_size) %}
3231
{% set bindings = [] %}
3332

@@ -56,11 +55,3 @@
5655
{# Return SQL so we can render it out into the compiled files #}
5756
{{ return(statements[0]) }}
5857
{% endmacro %}
59-
60-
{% macro sqlserver__load_csv_rows(model, agate_table) %}
61-
{% set max_batch_size = get_batch_size() %}
62-
{% set cols_sql = get_seed_column_quoted_csv(model, agate_table.column_names) %}
63-
{% set batch_size = calc_batch_size(cols_sql|length, max_batch_size) %}
64-
65-
{{ return(basic_load_csv_rows(model, batch_size, agate_table) )}}
66-
{% endmacro %}

tests/functional/adapter/test_seed.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
import os
2+
13
import pytest
24
from dbt.tests.adapter.simple_seed.seeds import seeds__expected_sql
5+
from dbt.tests.adapter.simple_seed.test_seed import SeedConfigBase
36
from dbt.tests.adapter.simple_seed.test_seed import TestBasicSeedTests as BaseBasicSeedTests
47
from dbt.tests.adapter.simple_seed.test_seed import (
58
TestSeedConfigFullRefreshOff as BaseSeedConfigFullRefreshOff,
@@ -20,7 +23,7 @@
2023
seeds__disabled_in_config_csv,
2124
seeds__enabled_in_config_csv,
2225
)
23-
from dbt.tests.util import get_connection
26+
from dbt.tests.util import get_connection, run_dbt
2427

2528
from dbt.adapters.sqlserver import SQLServerAdapter
2629

@@ -180,3 +183,43 @@ def setUp(self, project):
180183

181184
class TestSeedSpecificFormatsSQLServer(BaseSeedSpecificFormats):
182185
pass
186+
187+
188+
class TestSeedBatchSizeMaxSQLServer(SeedConfigBase):
189+
@pytest.fixture(scope="class")
190+
def seeds(self, test_data_dir):
191+
return {
192+
"five_columns.csv": """
193+
seed_id,first_name,email,ip_address,birthday
194+
1,Larry,[email protected],69.135.206.194,2008-09-12 19:08:31
195+
2,Larry,[email protected],64.210.133.162,1978-05-09 04:15:14
196+
3,Anna,[email protected],168.104.64.114,2011-10-16 04:07:57
197+
"""
198+
}
199+
200+
def test_max_batch_size(self, project, logs_dir):
201+
run_dbt(["seed"])
202+
with open(os.path.join(logs_dir, "dbt.log"), "r") as fp:
203+
logs = "".join(fp.readlines())
204+
205+
assert "Inserting batches of 400 records" in logs
206+
207+
208+
class TestSeedBatchSizeCustomSQLServer(SeedConfigBase):
209+
@pytest.fixture(scope="class")
210+
def seeds(self, test_data_dir):
211+
return {
212+
"six_columns.csv": """
213+
seed_id,first_name,last_name,email,ip_address,birthday
214+
1,Larry,King,[email protected],69.135.206.194,2008-09-12 19:08:31
215+
2,Larry,Perkins,[email protected],64.210.133.162,1978-05-09 04:15:14
216+
3,Anna,Montgomery,[email protected],168.104.64.114,2011-10-16 04:07:57
217+
"""
218+
}
219+
220+
def test_custom_batch_size(self, project, logs_dir):
221+
run_dbt(["seed"])
222+
with open(os.path.join(logs_dir, "dbt.log"), "r") as fp:
223+
logs = "".join(fp.readlines())
224+
225+
assert "Inserting batches of 350 records" in logs

0 commit comments

Comments
 (0)