Skip to content

Commit 91d9c13

Browse files
Moj 574 speedup hr uploads processing (localytics#23)
* MOJ-574 Updates to the ODBC adapter for a new merge_all functionality. Similar functionality to active record's insert/upsert all, but uses the SQL merge syntax instead of insert with update capabilities. * MOJ-574 Fixes to the rails 7 changes and added the ability to prune duplicate records in merge_all * MOJ-574 Removed extra debug statement * MOJ-574 Optimization to reduce memory and/or cpu usage. Large merges are killing sidekiq. * MOJ-574 Cleaning up the delete code, but ultimately leaving it intentionally disabled and inaccessible. It'll get worked on/tested if/when it's needed. * MOJ-574 I forgot to remove the delete_key code from the persistence portion of the adapter... * MOJ-574 And forgot to correct delete_keys in updatable and insertable columns methods
1 parent dbe56e2 commit 91d9c13

File tree

6 files changed

+251
-15
lines changed

6 files changed

+251
-15
lines changed

lib/active_record/connection_adapters/odbc_adapter.rb

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -155,24 +155,41 @@ def next_sequence_value(table_name = nil)
155155
exec_query("SELECT #{table_name}.NEXTVAL as new_id").first["new_id"]
156156
end
157157

158+
def build_merge_sql(merge) # :nodoc:
159+
<<~SQL
160+
MERGE #{merge.into} AS TARGET USING (#{merge.values_list}) AS SOURCE ON #{merge.match}
161+
#{merge.merge_delete}
162+
#{merge.merge_update}
163+
#{merge.merge_insert}
164+
SQL
165+
end
166+
167+
def exec_merge_all(sql, name) # :nodoc:
168+
exec_query(sql, name)
169+
end
170+
158171
protected
159172

160173
#Snowflake ODBC Adapter specific
161174
def initialize_type_map(map)
162-
map.register_type :boolean, Type::Boolean.new
163-
map.register_type :date, Type::Date.new
164-
map.register_type :string, Type::String.new
165-
map.register_type :datetime, Type::DateTime.new
166-
map.register_type :time, Type::Time.new
167-
map.register_type :binary, Type::Binary.new
168-
map.register_type :float, Type::Float.new
169-
map.register_type :integer, ::ODBCAdapter::Type::SnowflakeInteger.new
170-
map.register_type(:decimal) do |_sql_type, column_data|
171-
Type::Decimal.new(precision: column_data.precision, scale: column_data.scale)
175+
map.register_type %r(boolean)i, Type::Boolean.new
176+
map.register_type %r(date)i, Type::Date.new
177+
map.register_type %r(varchar)i, Type::String.new
178+
map.register_type %r(time)i, Type::Time.new
179+
map.register_type %r(timestamp)i, Type::DateTime.new
180+
map.register_type %r(binary)i, Type::Binary.new
181+
map.register_type %r(double)i, Type::Float.new
182+
map.register_type(%r(decimal)i) do |sql_type|
183+
scale = extract_scale(sql_type)
184+
if scale == 0
185+
::ODBCAdapter::Type::SnowflakeInteger.new
186+
else
187+
Type::Decimal.new(precision: extract_precision(sql_type), scale: scale)
188+
end
172189
end
173-
map.register_type :object, ::ODBCAdapter::Type::SnowflakeObject.new
174-
map.register_type :array, ::ODBCAdapter::Type::ArrayOfValues.new
175-
map.register_type :variant, ::ODBCAdapter::Type::Variant.new
190+
map.register_type %r(struct)i, ::ODBCAdapter::Type::SnowflakeObject.new
191+
map.register_type %r(array)i, ::ODBCAdapter::Type::ArrayOfValues.new
192+
map.register_type %r(variant)i, ::ODBCAdapter::Type::Variant.new
176193
end
177194

178195
# Translate an exception from the native DBMS to something usable by

lib/active_record/merge_all.rb

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
# frozen_string_literal: true
2+
3+
require "active_support/core_ext/enumerable"
4+
5+
module ActiveRecord
6+
class MergeAll # :nodoc:
7+
attr_reader :model, :connection, :merges, :keys
8+
attr_reader :perform_inserts, :perform_updates, :delete_key
9+
10+
def initialize(model, merges, perform_inserts: true, perform_updates: true, prune_duplicates: false)
11+
raise ArgumentError, "Empty list of attributes passed" if merges.blank?
12+
13+
# TODO: Implement perform_deletes. Most of the code is here, but all completely untested.
14+
@model, @connection, @merges, @keys = model, model.connection, merges, merges.first.keys.map(&:to_s)
15+
@perform_inserts, @perform_updates, @delete_key = perform_inserts, perform_updates, nil
16+
17+
if model.scope_attributes?
18+
@scope_attributes = model.scope_attributes
19+
@keys |= @scope_attributes.keys
20+
end
21+
@keys = @keys.to_set
22+
23+
ensure_valid_options_for_connection!
24+
25+
if prune_duplicates
26+
do_prune_duplicates
27+
end
28+
end
29+
30+
def execute
31+
message = +"#{model} "
32+
message << "Bulk " if merges.many?
33+
message << "Merge"
34+
connection.exec_merge_all to_sql, message
35+
end
36+
37+
def updatable_columns
38+
keys - readonly_columns - [delete_key]
39+
end
40+
41+
def insertable_columns
42+
keys - [delete_key]
43+
end
44+
45+
def insertable_non_primary_columns
46+
insertable_columns - primary_keys
47+
end
48+
49+
def primary_keys
50+
Array(connection.schema_cache.primary_keys(model.table_name))
51+
end
52+
53+
def map_key_with_value
54+
merges.map do |attributes|
55+
attributes = attributes.stringify_keys
56+
attributes.merge!(scope_attributes) if scope_attributes
57+
58+
verify_attributes(attributes)
59+
60+
keys.map do |key|
61+
yield key, attributes[key]
62+
end
63+
end
64+
end
65+
66+
def perform_deletes
67+
!delete_key.nil?
68+
end
69+
70+
private
71+
attr_reader :scope_attributes
72+
73+
def ensure_valid_options_for_connection!
74+
75+
end
76+
77+
def do_prune_duplicates
78+
unless primary_keys.to_set.subset?(keys)
79+
raise ArgumentError, "Pruning duplicates requires presense of all primary keys in the merges"
80+
end
81+
@merges = merges.reverse
82+
merges.uniq! do |merge|
83+
primary_keys.map { |key| merge[key] }
84+
end
85+
merges.reverse!
86+
end
87+
88+
def to_sql
89+
connection.build_merge_sql(ActiveRecord::MergeAll::Builder.new(self))
90+
end
91+
92+
def readonly_columns
93+
primary_keys + model.readonly_attributes.to_a
94+
end
95+
96+
def verify_attributes(attributes)
97+
if keys != attributes.keys.to_set
98+
raise ArgumentError, "All objects being merged must have the same keys"
99+
end
100+
end
101+
102+
class Builder # :nodoc:
103+
attr_reader :model
104+
105+
delegate :keys, to: :merge_all
106+
107+
def initialize(merge_all)
108+
@merge_all, @model, @connection = merge_all, merge_all.model, merge_all.connection
109+
end
110+
111+
def into
112+
# "INTO #{model.quoted_table_name} (#{columns_list})"
113+
"INTO #{model.quoted_table_name}"
114+
end
115+
116+
def values_list
117+
types = extract_types_from_columns_on(model.table_name, keys: keys)
118+
119+
values_list = merge_all.map_key_with_value do |key, value|
120+
connection.with_yaml_fallback(types[key].serialize(value))
121+
end
122+
123+
values = connection.visitor.compile(Arel::Nodes::ValuesList.new(values_list))
124+
125+
"SELECT * FROM (#{values}) AS v1 (#{columns_list})"
126+
end
127+
128+
def match
129+
quote_columns(merge_all.primary_keys).map { |column| "SOURCE.#{column}=TARGET.#{column}" }.join(" AND ")
130+
end
131+
132+
def merge_delete
133+
merge_all.perform_deletes ? "WHEN MATCHED AND SOURCE.#{quote_column(merge_all.delete_key)} = TRUE THEN DELETE" : ""
134+
end
135+
136+
def merge_update
137+
merge_all.perform_updates ? "WHEN MATCHED THEN UPDATE SET #{updatable_columns.map { |column| "TARGET.#{column}=SOURCE.#{column}" }.join(",")}" : ""
138+
end
139+
140+
def merge_insert
141+
if merge_all.perform_inserts
142+
<<~SQL
143+
WHEN NOT MATCHED AND #{quote_columns(merge_all.primary_keys).map { |column| "SOURCE.#{column} IS NOT NULL" }.join(" AND ")} THEN INSERT (#{insertable_columns_list}) VALUES (#{quote_columns(merge_all.insertable_columns).map { |column| "SOURCE.#{column}"}.join(",")})
144+
WHEN NOT MATCHED AND #{quote_columns(merge_all.primary_keys).map { |column| "SOURCE.#{column} IS NULL" }.join(" OR ")} THEN INSERT (#{insertable_non_primary_columns_list}) VALUES (#{quote_columns(merge_all.insertable_non_primary_columns).map { |column| "SOURCE.#{column}"}.join(",")})
145+
SQL
146+
else
147+
""
148+
end
149+
end
150+
151+
private
152+
attr_reader :connection, :merge_all
153+
154+
def columns_list
155+
format_columns(merge_all.keys)
156+
end
157+
158+
def insertable_columns_list
159+
format_columns(merge_all.insertable_columns)
160+
end
161+
162+
def insertable_non_primary_columns_list
163+
format_columns(merge_all.insertable_non_primary_columns)
164+
end
165+
166+
def updatable_columns
167+
quote_columns(merge_all.updatable_columns)
168+
end
169+
170+
def extract_types_from_columns_on(table_name, keys:)
171+
columns = connection.schema_cache.columns_hash(table_name)
172+
173+
unknown_column = (keys - columns.keys).first
174+
raise UnknownAttributeError.new(model.new, unknown_column) if unknown_column
175+
176+
keys.index_with { |key| model.type_for_attribute(key) }
177+
end
178+
179+
def format_columns(columns)
180+
columns.respond_to?(:map) ? quote_columns(columns).join(",") : columns
181+
end
182+
183+
def quote_columns(columns)
184+
columns.map(&method(:quote_column))
185+
end
186+
187+
def quote_column(column)
188+
connection.quote_column_name(column)
189+
end
190+
end
191+
end
192+
end
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
require 'active_record/merge_all'
2+
3+
module ActiveRecord
4+
# = Active Record \Persistence
5+
module MergeAllPersistence
6+
extend ActiveSupport::Concern
7+
8+
module ClassMethods
9+
def merge_all!(attributes, perform_inserts: true, perform_updates: true, prune_duplicates: false)
10+
MergeAll.new(self, attributes, perform_inserts: perform_inserts, perform_updates: perform_updates, prune_duplicates: prune_duplicates).execute
11+
end
12+
end
13+
end
14+
end

lib/odbc_adapter.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
# Requiring with this pattern to mirror ActiveRecord
22
require 'active_record/connection_adapters/odbc_adapter'
3+
require 'active_record/merge_all_persistence'

lib/odbc_adapter/quoting.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def quoted_date(value)
4040
end
4141

4242
def lookup_cast_type_from_column(column) # :nodoc:
43-
type_map.lookup(column.type)
43+
type_map.lookup(column.sql_type)
4444
end
4545

4646
def quote_hash(hash:)

lib/odbc_adapter/schema_statements.rb

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def columns(table_name, _name = nil)
8383
col_nullable = nullability(col_name, col[17], col[10])
8484

8585
# This section has been customized for Snowflake and will not work in general.
86-
args = { sql_type: col_native_type, type: col_native_type, limit: col_limit }
86+
args = { sql_type: construct_sql_type(col_native_type, col_limit, col_scale), type: col_native_type, limit: col_limit }
8787
args[:type] = case col_native_type
8888
when "BOOLEAN" then :boolean
8989
when "VARIANT" then :variant
@@ -177,5 +177,17 @@ def name_regex(name)
177177
/^#{name}$/i
178178
end
179179
end
180+
181+
# Changes in rails 7 mean that we need all of the type information in the sql_type column
182+
# This reconstructs sql types using limit (which is precision) and scale
183+
def construct_sql_type(native_type, limit, scale)
184+
if scale > 0
185+
"#{native_type}(#{limit},#{scale})"
186+
elsif limit > 0
187+
"#{native_type}(#{limit})"
188+
else
189+
native_type
190+
end
191+
end
180192
end
181193
end

0 commit comments

Comments
 (0)