forked from DFE-Digital/dfe-analytics
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanalytics.rb
More file actions
302 lines (254 loc) · 10.5 KB
/
analytics.rb
File metadata and controls
302 lines (254 loc) · 10.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
# frozen_string_literal: true
require 'request_store_rails'
require 'i18n'
require 'httparty'
require 'google/cloud/bigquery'
require 'dfe/analytics/activerecord' if defined?(ActiveRecord)
require 'dfe/analytics/event_schema'
require 'dfe/analytics/fields'
require 'dfe/analytics/entities'
require 'dfe/analytics/shared/service_pattern'
require 'dfe/analytics/event'
require 'dfe/analytics/event_matcher'
require 'dfe/analytics/analytics_job'
require 'dfe/analytics/send_events'
require 'dfe/analytics/load_entities'
require 'dfe/analytics/load_entity_batch'
require 'dfe/analytics/requests'
require 'dfe/analytics/entity_table_check_job'
require 'dfe/analytics/initialisation_events'
require 'dfe/analytics/version'
require 'dfe/analytics/middleware/request_identity'
require 'dfe/analytics/middleware/send_cached_page_request_event'
require 'dfe/analytics/railtie'
require 'dfe/analytics/big_query_api'
require 'dfe/analytics/big_query_legacy_api'
require 'dfe/analytics/azure_federated_auth'
module DfE
module Analytics
class ConfigurationError < StandardError; end
def self.config
configurables = %i[
log_only
async
queue
bigquery_table_name
bigquery_project_id
bigquery_dataset
bigquery_api_json_key
bigquery_retries
bigquery_timeout
enable_analytics
environment
user_identifier
entity_table_checks_enabled
rack_page_cached
bigquery_maintenance_window
azure_federated_auth
azure_client_id
azure_token_path
azure_scope
gcp_scope
google_cloud_credentials
excluded_paths
excluded_models_proc
]
@config ||= Struct.new(*configurables).new
end
def self.configure
yield(config)
config.enable_analytics ||= proc { true }
config.bigquery_table_name ||= ENV.fetch('BIGQUERY_TABLE_NAME', nil)
config.bigquery_project_id ||= ENV.fetch('BIGQUERY_PROJECT_ID', nil)
config.bigquery_dataset ||= ENV.fetch('BIGQUERY_DATASET', nil)
config.bigquery_api_json_key ||= ENV.fetch('BIGQUERY_API_JSON_KEY', nil)
config.bigquery_retries ||= 3
config.bigquery_timeout ||= 120
config.environment ||= ENV.fetch('RAILS_ENV', 'development')
config.log_only ||= false
config.async ||= true
config.queue ||= :default
config.user_identifier ||= proc { |user| user&.id }
config.entity_table_checks_enabled ||= false
config.rack_page_cached ||= proc { |_rack_env| false }
config.bigquery_maintenance_window ||= ENV.fetch('BIGQUERY_MAINTENANCE_WINDOW', nil)
config.azure_federated_auth ||= false
config.excluded_paths ||= []
config.excluded_models_proc ||= proc { |_model| false }
return unless config.azure_federated_auth
config.azure_client_id ||= ENV.fetch('AZURE_CLIENT_ID', nil)
config.azure_token_path ||= ENV.fetch('AZURE_FEDERATED_TOKEN_FILE', nil)
config.google_cloud_credentials ||= JSON.parse(ENV.fetch('GOOGLE_CLOUD_CREDENTIALS', '{}')).deep_symbolize_keys
config.azure_scope ||= DfE::Analytics::AzureFederatedAuth::DEFAULT_AZURE_SCOPE
config.gcp_scope ||= DfE::Analytics::AzureFederatedAuth::DEFAULT_GCP_SCOPE
end
def self.initialize!
unless Rails.env.production? || File.exist?(Rails.root.join('config/initializers/dfe_analytics.rb'))
message = "Warning: DfE Analytics is not set up. Run: 'bundle exec rails generate dfe:analytics:install'"
Rails.logger.error(message)
puts message
return
end
if defined?(ActiveRecord)
setup_entities
else
Rails.logger.info('ActiveRecord not loaded; DfE Analytics will only track non-database requests.')
end
end
def self.setup_entities
if Rails.version.to_f > 7.1
ActiveRecord::Base.with_connection do |connection|
raise ActiveRecord::PendingMigrationError if connection.pool.migration_context.needs_migration?
end
elsif ActiveRecord::Base.connection.migration_context.needs_migration?
raise ActiveRecord::PendingMigrationError
end
DfE::Analytics::Fields.check!
entities_for_analytics.each do |entity|
models_for_entity(entity).each do |m|
m.include(DfE::Analytics::TransactionChanges)
if m.include?(DfE::Analytics::Entities)
Rails.logger.warn("DEPRECATION WARNING: DfE::Analytics::Entities was manually included in a model (#{m.name}), but it's included automatically since v1.4. You're running v#{DfE::Analytics::VERSION}. To silence this warning, remove the include from model definitions in app/models.")
else
m.include(DfE::Analytics::Entities)
break
end
end
end
rescue ActiveRecord::PendingMigrationError
Rails.logger.error('Database requires migration; DfE Analytics not initialized')
rescue ActiveRecord::ActiveRecordError
Rails.logger.error('No database connection; DfE Analytics not initialized')
end
def self.enabled?
config.enable_analytics.call
end
def self.allowlist
Rails.application.config_for(:analytics)
end
def self.hidden_pii
Rails.application.config_for(:analytics_hidden_pii)
rescue RuntimeError
{ 'shared' => {} }
end
def self.blocklist
Rails.application.config_for(:analytics_blocklist)
end
def self.custom_events
Rails.application.config_for(:analytics_custom_events)
rescue RuntimeError
[]
end
def self.event_debug_filters
Rails.application.config_for(:analytics_event_debug)
rescue RuntimeError
{}
end
def self.environment
config.environment
end
def self.log_only?
config.log_only
end
def self.event_debug_enabled?
event_debug_filters[:event_filters]&.any?
end
def self.async?
config.async
end
def self.entities_for_analytics
allowlist.keys
end
def self.all_entities_in_application
entity_model_mapping.keys.map(&:to_sym)
end
def self.models_for_entity(entity)
entity_model_mapping.fetch(entity.to_s)
end
def self.extract_model_attributes(model, attributes = nil)
# if no list of attrs specified, consider all attrs belonging to this model
attributes ||= model.attributes
table_name = model.class.table_name.to_sym
exportable_attrs = (allowlist[table_name].presence || []).map(&:to_sym)
hidden_pii_attrs = (hidden_pii[table_name].presence || []).map(&:to_sym)
exportable_hidden_pii_attrs = exportable_attrs & hidden_pii_attrs
# Exclude hidden pii attributes from allowed_attributes
allowed_attrs_to_include = exportable_attrs - exportable_hidden_pii_attrs
allowed_attributes = attributes.slice(*allowed_attrs_to_include&.map(&:to_s))
hidden_attributes = attributes.slice(*exportable_hidden_pii_attrs&.map(&:to_s))
# Allowed attributes must be kept separate from hidden_attributes
{}.tap do |model_attributes|
model_attributes[:data] = allowed_attributes if allowed_attributes.any?
model_attributes[:hidden_data] = hidden_attributes if hidden_attributes.any?
end
end
def self.anonymise(value)
# Google SQL equivalent of this is TO_HEX(SHA256(value))
Digest::SHA2.hexdigest(value.to_s)
end
def self.entity_model_mapping
# ActiveRecord::Base.descendants will collect every model in the
# application, including internal models Rails uses to represent
# has_and_belongs_to_many relationships without their own models. We map
# these back to table_names which are equivalent to dfe-analytics
# "entities".
@entity_model_mapping ||= begin
# Gems like devise put helper methods into controllers, and they add
# those methods via the routes file.
#
# Rails.configuration.eager_load = true, which is enabled by default in
# production and not in development, will cause routes to be loaded
# before controllers; a direct call to Rails.application.eager_load! will
# not. To avoid this specific conflict with devise and possibly other
# gems/engines, proactively load the routes unless
# configuration.eager_load is set.
Rails.application.reload_routes! unless Rails.configuration.eager_load
Rails.application.eager_load!
rails_tables = %w[ar_internal_metadata schema_migrations]
ActiveRecord::Base.descendants
.reject(&:abstract_class?)
.reject(&DfE::Analytics.config.excluded_models_proc)
.group_by(&:table_name)
.except(*rails_tables)
end
end
private_class_method :entity_model_mapping
def self.user_identifier(user)
config.user_identifier.call(user)
end
def self.rack_page_cached?(rack_env)
config.rack_page_cached.call(rack_env)
end
def self.entity_table_checks_enabled?
config.entity_table_checks_enabled
end
def self.parse_maintenance_window
return [nil, nil] unless config.bigquery_maintenance_window
start_str, end_str = config.bigquery_maintenance_window.split('..', 2).map(&:strip)
begin
parsed_start_time = DateTime.strptime(start_str, '%d-%m-%Y %H:%M')
parsed_end_time = DateTime.strptime(end_str, '%d-%m-%Y %H:%M')
start_time = Time.zone.parse(parsed_start_time.to_s)
end_time = Time.zone.parse(parsed_end_time.to_s)
if start_time > end_time
Rails.logger.warn('Start time is after end time in maintenance window configuration')
return [nil, nil]
end
[start_time, end_time]
rescue ArgumentError => e
Rails.logger.error("DfE::Analytics: Unexpected error in maintenance window configuration: #{e.message}")
[nil, nil]
end
end
def self.within_maintenance_window?
start_time, end_time = parse_maintenance_window
return false unless start_time && end_time
Time.zone.now.between?(start_time, end_time)
end
def self.next_scheduled_time_after_maintenance_window
start_time, end_time = parse_maintenance_window
return unless start_time && end_time
end_time + (Time.zone.now - start_time).seconds
end
end
end