Skip to content

Commit d1c0665

Browse files
authored
Added hashing function support (microsoft#152)
* Got hashing test to run to completion * Got unit test to pass * Made hashing functions only work on SQL Server 2019 and above * Fixed merge conflicts * Removed accidentally added unit test * Retrieved column_name more simply
1 parent c24dd7b commit d1c0665

File tree

2 files changed

+115
-13
lines changed

2 files changed

+115
-13
lines changed

mssql/functions.py

Lines changed: 99 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from django.db.models import BooleanField, CheckConstraint, Value
1010
from django.db.models.expressions import Case, Exists, Expression, OrderBy, When, Window
1111
from django.db.models.fields import BinaryField, Field
12-
from django.db.models.functions import Cast, NthValue
12+
from django.db.models.functions import Cast, NthValue, MD5, SHA1, SHA224, SHA256, SHA384, SHA512
1313
from django.db.models.functions.math import ATan2, Ln, Log, Mod, Round
1414
from django.db.models.lookups import In, Lookup
1515
from django.db.models.query import QuerySet
@@ -288,6 +288,98 @@ def bulk_update_with_default(self, objs, fields, batch_size=None, default=0):
288288
return rows_updated
289289

290290

291+
def sqlserver_md5(self, compiler, connection, **extra_context):
292+
# UTF-8 support added in SQL Server 2019
293+
if (connection.sql_server_version < 2019):
294+
raise NotSupportedError("Hashing is not supported on this version SQL Server. Upgrade to 2019 or above")
295+
296+
column_name = self.get_source_fields()[0].name
297+
298+
with connection.cursor() as cursor:
299+
cursor.execute("SELECT MAX(DATALENGTH(%s)) FROM %s" % (column_name, compiler.query.model._meta.db_table))
300+
max_size = cursor.fetchone()[0]
301+
302+
# Collation of SQL Server by default is UTF-16 but Django always assumes UTF-8 enconding
303+
# https://docs.djangoproject.com/en/4.0/ref/unicode/#general-string-handling
304+
return self.as_sql(
305+
compiler,
306+
connection,
307+
template="LOWER(CONVERT(CHAR(32), HASHBYTES('%s', CAST(%s COLLATE Latin1_General_100_CI_AI_SC_UTF8 AS VARCHAR(%s))), 2))" % ('%(function)s', column_name, max_size),
308+
**extra_context,
309+
)
310+
311+
312+
def sqlserver_sha1(self, compiler, connection, **extra_context):
313+
# UTF-8 support added in SQL Server 2019
314+
if (connection.sql_server_version < 2019):
315+
raise NotSupportedError("Hashing is not supported on this version SQL Server. Upgrade to 2019 or above")
316+
317+
column_name = self.get_source_fields()[0].name
318+
319+
# Collation of SQL Server by default is UTF-16 but Django always assumes UTF-8 enconding
320+
# https://docs.djangoproject.com/en/4.0/ref/unicode/#general-string-handling
321+
with connection.cursor() as cursor:
322+
cursor.execute("SELECT MAX(DATALENGTH(%s)) FROM %s" % (column_name, compiler.query.model._meta.db_table))
323+
max_size = cursor.fetchone()[0]
324+
325+
return self.as_sql(
326+
compiler,
327+
connection,
328+
template="LOWER(CONVERT(CHAR(40), HASHBYTES('%s', CAST(%s COLLATE Latin1_General_100_CI_AI_SC_UTF8 AS VARCHAR(%s))), 2))" % ('%(function)s', column_name, max_size),
329+
**extra_context,
330+
)
331+
332+
333+
def sqlserver_sha224(self, compiler, connection, **extra_context):
334+
raise NotSupportedError("SHA224 is not supported on SQL Server.")
335+
336+
337+
def sqlserver_sha256(self, compiler, connection, **extra_context):
338+
# UTF-8 support added in SQL Server 2019
339+
if (connection.sql_server_version < 2019):
340+
raise NotSupportedError("Hashing is not supported on this version SQL Server. Upgrade to 2019 or above")
341+
342+
column_name = self.get_source_fields()[0].name
343+
344+
# Collation of SQL Server by default is UTF-16 but Django always assumes UTF-8 enconding
345+
# https://docs.djangoproject.com/en/4.0/ref/unicode/#general-string-handling
346+
with connection.cursor() as cursor:
347+
cursor.execute("SELECT MAX(DATALENGTH(%s)) FROM %s" % (column_name, compiler.query.model._meta.db_table))
348+
max_size = cursor.fetchone()[0]
349+
350+
return self.as_sql(
351+
compiler,
352+
connection,
353+
template="LOWER(CONVERT(CHAR(64), HASHBYTES('SHA2_256', CAST(%s COLLATE Latin1_General_100_CI_AI_SC_UTF8 AS VARCHAR(%s))), 2))" % (column_name, max_size),
354+
**extra_context,
355+
)
356+
357+
358+
def sqlserver_sha384(self, compiler, connection, **extra_context):
359+
raise NotSupportedError("SHA384 is not supported on SQL Server.")
360+
361+
362+
def sqlserver_sha512(self, compiler, connection, **extra_context):
363+
# UTF-8 support added in SQL Server 2019
364+
if (connection.sql_server_version < 2019):
365+
raise NotSupportedError("Hashing is not supported on this version SQL Server. Upgrade to 2019 or above")
366+
367+
column_name = self.get_source_fields()[0].name
368+
369+
# Collation of SQL Server by default is UTF-16 but Django always assumes UTF-8 enconding
370+
# https://docs.djangoproject.com/en/4.0/ref/unicode/#general-string-handling
371+
with connection.cursor() as cursor:
372+
cursor.execute("SELECT MAX(DATALENGTH(%s)) FROM %s" % (column_name, compiler.query.model._meta.db_table))
373+
max_size = cursor.fetchone()[0]
374+
375+
return self.as_sql(
376+
compiler,
377+
connection,
378+
template="LOWER(CONVERT(CHAR(128), HASHBYTES('SHA2_512', CAST(%s COLLATE Latin1_General_100_CI_AI_SC_UTF8 AS VARCHAR(%s))), 2))" % (column_name, max_size),
379+
**extra_context,
380+
)
381+
382+
291383
# `as_microsoft` called by django.db.models.sql.compiler based on connection.vendor
292384
ATan2.as_microsoft = sqlserver_atan2
293385
# Need copy of old In.split_parameter_list_as_sql for other backends to call
@@ -305,6 +397,12 @@ def bulk_update_with_default(self, objs, fields, batch_size=None, default=0):
305397
NthValue.as_microsoft = sqlserver_nth_value
306398
Round.as_microsoft = sqlserver_round
307399
Window.as_microsoft = sqlserver_window
400+
MD5.as_microsoft = sqlserver_md5
401+
SHA1.as_microsoft = sqlserver_sha1
402+
SHA224.as_microsoft = sqlserver_sha224
403+
SHA256.as_microsoft = sqlserver_sha256
404+
SHA384.as_microsoft = sqlserver_sha384
405+
SHA512.as_microsoft = sqlserver_sha512
308406
BinaryField.__init__ = BinaryField_init
309407
CheckConstraint._get_check_sql = _get_check_sql
310408

testapp/settings.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -175,18 +175,6 @@
175175
'aggregation.tests.AggregateTestCase.test_aggregation_subquery_annotation_exists',
176176
'aggregation.tests.AggregateTestCase.test_aggregation_subquery_annotation_values_collision',
177177
'db_functions.datetime.test_extract_trunc.DateFunctionWithTimeZoneTests.test_extract_func_with_timezone',
178-
'db_functions.text.test_md5.MD5Tests.test_basic',
179-
'db_functions.text.test_md5.MD5Tests.test_transform',
180-
'db_functions.text.test_sha1.SHA1Tests.test_basic',
181-
'db_functions.text.test_sha1.SHA1Tests.test_transform',
182-
'db_functions.text.test_sha224.SHA224Tests.test_basic',
183-
'db_functions.text.test_sha224.SHA224Tests.test_transform',
184-
'db_functions.text.test_sha256.SHA256Tests.test_basic',
185-
'db_functions.text.test_sha256.SHA256Tests.test_transform',
186-
'db_functions.text.test_sha384.SHA384Tests.test_basic',
187-
'db_functions.text.test_sha384.SHA384Tests.test_transform',
188-
'db_functions.text.test_sha512.SHA512Tests.test_basic',
189-
'db_functions.text.test_sha512.SHA512Tests.test_transform',
190178
'expressions.tests.FTimeDeltaTests.test_date_subquery_subtraction',
191179
'expressions.tests.FTimeDeltaTests.test_datetime_subquery_subtraction',
192180
'expressions.tests.FTimeDeltaTests.test_time_subquery_subtraction',
@@ -267,6 +255,22 @@
267255
'migrations.test_operations.OperationTests.test_run_sql_add_missing_semicolon_on_collect_sql',
268256
'migrations.test_operations.OperationTests.test_alter_field_pk_mti_and_fk_to_base',
269257

258+
# Hashing
259+
# UTF-8 support was added in SQL Server 2019
260+
'db_functions.text.test_md5.MD5Tests.test_basic',
261+
'db_functions.text.test_md5.MD5Tests.test_transform',
262+
'db_functions.text.test_sha1.SHA1Tests.test_basic',
263+
'db_functions.text.test_sha1.SHA1Tests.test_transform',
264+
'db_functions.text.test_sha256.SHA256Tests.test_basic',
265+
'db_functions.text.test_sha256.SHA256Tests.test_transform',
266+
'db_functions.text.test_sha512.SHA512Tests.test_basic',
267+
'db_functions.text.test_sha512.SHA512Tests.test_transform',
268+
# SQL Server doesn't support SHA224 or SHA387
269+
'db_functions.text.test_sha224.SHA224Tests.test_basic',
270+
'db_functions.text.test_sha224.SHA224Tests.test_transform',
271+
'db_functions.text.test_sha384.SHA384Tests.test_basic',
272+
'db_functions.text.test_sha384.SHA384Tests.test_transform',
273+
270274
# Timezone
271275
'timezones.tests.NewDatabaseTests.test_cursor_explicit_time_zone',
272276
# Skipped next tests because pyodbc drops timezone https://github.com/mkleehammer/pyodbc/issues/810

0 commit comments

Comments
 (0)