Skip to content

Commit 694a71a

Browse files
committed
Merge branch 'develop'
2 parents c4d2caa + bf0796e commit 694a71a

File tree

7 files changed

+15
-5
lines changed

7 files changed

+15
-5
lines changed

docs/changelog/0.12.3.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@
44
Bug Fixes
55
---------
66

7-
- Allow passing table names in format ``schema."table.with.dots"`` to ``DBReader(name=...)`` and ``DBWriter(name=...)``.
7+
- Allow passing table names in format ``schema."table.with.dots"`` to ``DBReader(source=...)`` and ``DBWriter(target=...)``.

docs/changelog/0.12.4.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
0.12.4 (2024-11-27)
2+
===================
3+
4+
Bug Fixes
5+
---------
6+
7+
- Fix ``DBReader(conn=oracle, options={"partitioning_mode": "hash"})`` lead to data skew in last partition due to wrong ``ora_hash`` usage. (:github:pull:`319`)

docs/changelog/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
:caption: Changelog
44

55
DRAFT
6+
0.12.4
67
0.12.3
78
0.12.2
89
0.12.1

onetl/VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.12.3
1+
0.12.4

onetl/connection/db_connection/clickhouse/dialect.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
class ClickhouseDialect(JDBCDialect):
1212
def get_partition_column_hash(self, partition_column: str, num_partitions: int) -> str:
13-
return f"modulo(halfMD5({partition_column}), {num_partitions})"
13+
return f"halfMD5({partition_column}) % {num_partitions}"
1414

1515
def get_partition_column_mod(self, partition_column: str, num_partitions: int) -> str:
1616
return f"{partition_column} % {num_partitions}"

onetl/connection/db_connection/mssql/dialect.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
class MSSQLDialect(JDBCDialect):
1111
# https://docs.microsoft.com/ru-ru/sql/t-sql/functions/hashbytes-transact-sql?view=sql-server-ver16
1212
def get_partition_column_hash(self, partition_column: str, num_partitions: int) -> str:
13-
return f"CONVERT(BIGINT, HASHBYTES ( 'SHA' , {partition_column} )) % {num_partitions}"
13+
return f"CONVERT(BIGINT, HASHBYTES ('SHA', {partition_column})) % {num_partitions}"
1414

1515
def get_partition_column_mod(self, partition_column: str, num_partitions: int) -> str:
1616
return f"{partition_column} % {num_partitions}"

onetl/connection/db_connection/oracle/dialect.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ def get_sql_query(
4343
)
4444

4545
def get_partition_column_hash(self, partition_column: str, num_partitions: int) -> str:
46-
return f"ora_hash({partition_column}, {num_partitions})"
46+
# ora_hash returns values from 0 to N including N.
47+
# Balancing N+1 splits to N partitions leads to data skew in last partition.
48+
return f"ora_hash({partition_column}, {num_partitions - 1})"
4749

4850
def get_partition_column_mod(self, partition_column: str, num_partitions: int) -> str:
4951
return f"MOD({partition_column}, {num_partitions})"

0 commit comments

Comments
 (0)