Skip to content

Commit 46fc0a9

Browse files
authored
Merge pull request #1206 from ctti-clinicaltrials/fix/outcome-result-group-id
Fix Outcome Result Groups IDs
2 parents 8de5299 + 9a792a5 commit 46fc0a9

File tree

12 files changed

+142
-72
lines changed

12 files changed

+142
-72
lines changed

app/models/calculated_value.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ class CalculatedValue < ActiveRecord::Base
22
belongs_to :study, :foreign_key => 'nct_id'
33

44
def self.populate_for(nct_ids)
5+
Rails.logger.info "Calculating values for #{nct_ids.size} studies"
56
return if nct_ids.empty?
67
@nct_ids = nct_ids.freeze
78
@calculations = initialize_calculations

app/models/outcome.rb

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
class Outcome < StudyRelationship
2-
has_many :outcome_counts, inverse_of: :outcome
3-
has_many :outcome_analyses, inverse_of: :outcome
4-
has_many :outcome_measurements, inverse_of: :outcome
2+
has_many :outcome_counts
3+
has_many :outcome_analyses
4+
has_many :outcome_measurements
5+
has_many :result_groups
56

67
add_mapping do
78
{
89
table: :outcomes,
910
root: [:resultsSection, :outcomeMeasuresModule, :outcomeMeasures],
10-
requires: :result_groups,
1111
columns: [
1212
{ name: :outcome_type, value: :type },
1313
{ name: :title, value: :title },
@@ -22,13 +22,23 @@ class Outcome < StudyRelationship
2222
{ name: :param_type, value: :paramType }
2323
],
2424
children: [
25+
{
26+
table: :result_groups,
27+
root: [:groups],
28+
columns: [
29+
{ name: :ctgov_group_code, value: :id },
30+
{ name: :result_type, value: 'Outcome' },
31+
{ name: :title, value: :title },
32+
{ name: :description, value: :description }
33+
]
34+
},
2535
{
2636
table: :outcome_measurements,
27-
root: nil,
37+
# TODO: after removing dup values can update root to [:classes] to reduce object size
38+
root: nil,
2839
flatten: [:classes, :categories, :measurements],
2940
columns: [
30-
{ name: :outcome_id, value: nil },
31-
{ name: :result_group_id, value: reference(:result_groups)[:groupId, 'Outcome'] },
41+
# result_group_id is set by ResultGroup.set_outcome_results_group_ids
3242
{ name: :ctgov_group_code, value: :groupId },
3343
{ name: :classification, value: [:$parent, :$parent, :title] },
3444
{ name: :category, value: [:$parent, :title] },
@@ -52,12 +62,10 @@ class Outcome < StudyRelationship
5262
},
5363
{
5464
table: :outcome_counts,
55-
root: nil,
56-
requires: :result_groups, # do I need this since the parent has it?
57-
flatten: [:denoms, :counts],
65+
root: [:denoms],
66+
flatten: [:counts],
5867
columns: [
59-
{ name: :outcome_id, value: nil },
60-
{ name: :result_group_id, value: reference(:result_groups)[:groupId, 'Outcome'] },
68+
# result_group_id is set by ResultGroup.handle_outcome_result_groups_ids
6169
{ name: :ctgov_group_code, value: [:groupId] },
6270
{ name: :scope, value: 'Measure' },
6371
{ name: :units, value: [:$parent, :units] },
@@ -98,9 +106,8 @@ class Outcome < StudyRelationship
98106
table: :outcome_analysis_groups,
99107
root: [:groupIds],
100108
columns: [
101-
{ name: :outcome_analysis_id, value: nil },
102-
{ name: :result_group_id, value: reference(:result_groups)[nil, 'Outcome'] },
103-
{ name: :ctgov_group_code, value: nil }
109+
# result_group_id is set by ResultGroup.handle_outcome_result_groups_ids
110+
{ name: :ctgov_group_code, value: nil } # will be single ctgov_group_code from [:groupIds]
104111
]
105112
}
106113
]

app/models/outcome_analysis.rb

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
class OutcomeAnalysis < StudyRelationship
2-
belongs_to :outcome, inverse_of: :outcome_analyses
3-
has_many :outcome_analysis_groups, inverse_of: :outcome_analysis
4-
has_many :result_groups, :through => :outcome_analysis_groups
2+
belongs_to :outcome
3+
has_many :outcome_analysis_groups
54
end
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
class OutcomeAnalysisGroup < StudyRelationship
2-
belongs_to :outcome_analysis, inverse_of: :outcome_analysis_groups
3-
belongs_to :result_group, inverse_of: :outcome_analysis_groups
2+
belongs_to :outcome_analysis#, inverse_of: :outcome_analysis_groups
3+
belongs_to :result_group#, inverse_of: :outcome_analysis_groups
44

55
end

app/models/result_group.rb

Lines changed: 61 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,16 @@
11
class ResultGroup < StudyRelationship
2-
3-
has_many :reported_events
4-
has_many :milestones
5-
has_many :drop_withdrawals
6-
has_many :baseline_counts
7-
has_many :baseline_measures
8-
has_many :outcome_counts
9-
has_many :outcome_measurements
10-
has_many :outcome_analysis_groups, inverse_of: :result_group
11-
has_many :outcome_analyses, :through => :outcome_analysis_groups
2+
belongs_to :outcome, optional: true
123

134
add_mapping do
145
[
156
{
167
table: :result_groups,
178
root: [:resultsSection, :baselineCharacteristicsModule, :groups],
18-
index: [:ctgov_group_code, :result_type],
19-
unique: true,
20-
columns: [
21-
{ name: :ctgov_group_code, value: :id },
22-
{ name: :result_type, value: 'Baseline' },
23-
{ name: :title, value: :title },
24-
{ name: :description, value: :description },
25-
]
26-
},
27-
{
28-
table: :result_groups,
29-
root: [:resultsSection, :outcomeMeasuresModule, :outcomeMeasures],
30-
flatten: [:groups],
319
index: [:ctgov_group_code, :result_type],
3210
unique: true,
3311
columns: [
3412
{ name: :ctgov_group_code, value: :id },
35-
{ name: :result_type, value: 'Outcome' },
13+
{ name: :result_type, value: 'Baseline' },
3614
{ name: :title, value: :title },
3715
{ name: :description, value: :description },
3816
]
@@ -53,6 +31,8 @@ class ResultGroup < StudyRelationship
5331
table: :result_groups,
5432
root: [:resultsSection, :adverseEventsModule, :eventGroups],
5533
index: [:ctgov_group_code, :result_type],
34+
# outcomes should be loaded first to avoid double loading of reported events
35+
requires: :outcomes, # TODO: review - this shouldn't be necessary
5636
unique: true,
5737
columns: [
5838
{ name: :ctgov_group_code, value: :id },
@@ -63,4 +43,61 @@ class ResultGroup < StudyRelationship
6343
}
6444
]
6545
end
46+
47+
def self.handle_outcome_result_groups_ids(nct_ids)
48+
Rails.logger.info "Handling Results Group IDs for Outcome Models"
49+
result_groups = fetch_outcome_groups_for(nct_ids)
50+
set_results_group_ids_for(OutcomeCount, nct_ids, result_groups)
51+
set_results_group_ids_for(OutcomeMeasurement, nct_ids, result_groups)
52+
set_outcome_analysis_group_ids(nct_ids, result_groups)
53+
end
54+
55+
56+
private
57+
58+
# TODO: add test for private methods
59+
60+
def self.set_outcome_analysis_group_ids(nct_ids, result_groups)
61+
Rails.logger.info "Setting Outcome Analysis Group IDs"
62+
analyses = OutcomeAnalysis.where(nct_id: nct_ids).pluck(:id, :outcome_id).to_h
63+
groups = OutcomeAnalysisGroup.where(nct_id: nct_ids)
64+
65+
updates = groups.map do | group |
66+
outcome_id = analyses[group.outcome_analysis_id]
67+
next unless outcome_id
68+
69+
key = [group.nct_id, group.ctgov_group_code, outcome_id]
70+
result_group_id = result_groups[key]&.id
71+
{ id: group.id, result_group_id: result_group_id } if result_group_id
72+
end.compact
73+
74+
bulk_update(OutcomeAnalysisGroup, updates)
75+
end
76+
77+
def self.set_results_group_ids_for(model, nct_ids, result_groups)
78+
Rails.logger.info "Setting Result Group IDs for #{model}"
79+
80+
records = model.where(nct_id: nct_ids).select(:id, :nct_id, :ctgov_group_code, :outcome_id)
81+
updates = records.map do |record|
82+
key = [record.nct_id, record.ctgov_group_code, record.outcome_id]
83+
result_group_id = result_groups[key]&.id
84+
{ id: record.id, result_group_id: result_group_id } if result_group_id
85+
end.compact
86+
87+
bulk_update(model, updates)
88+
end
89+
90+
91+
# TODO: consider adding index for nct_id and result_type
92+
def self.fetch_outcome_groups_for(nct_ids)
93+
where(nct_id: nct_ids, result_type: "Outcome")
94+
.select(:id, :nct_id, :ctgov_group_code, :outcome_id)
95+
.index_by do |group|
96+
[group.nct_id, group.ctgov_group_code, group.outcome_id]
97+
end
98+
end
99+
100+
def self.bulk_update(model, updates)
101+
model.import updates, on_duplicate_key_update: { conflict_target: [:id], columns: [:result_group_id] }
102+
end
66103
end

app/models/study_json_record/worker.rb

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ def save_children(parents, indent=" ")
5151
klass = parents.first.class
5252
return if klass == Study
5353

54+
5455
klass.reflect_on_all_associations(:has_many).each do |association|
5556
# update the nct_id and parent_id of the children
5657
collection = []
@@ -59,6 +60,12 @@ def save_children(parents, indent=" ")
5960
next unless inverse_name
6061

6162
parent = child.send(inverse_name)
63+
# keeping this temprorary fix until we find a better solution
64+
if parent.nil?
65+
puts "Skipping child because parent is nil"
66+
puts "Parrent: #{inverse_name}, child: #{association.name}"
67+
next
68+
end
6269
child.nct_id = parent.nct_id
6370
child[association.foreign_key] = parent.id
6471
collection << child
@@ -90,11 +97,12 @@ def process_study(nct_id)
9097
def process(count = 1, records = nil)
9198
# load records
9299
records = StudyJsonRecord.version_2.needs_processing.limit(count) if records.nil?
93-
94-
Rails.logger.debug { "records: #{records.count}" }
100+
return if records.empty?
101+
nct_ids = records.map(&:nct_id)
102+
Rails.logger.debug { "records: #{nct_ids.count}" }
95103

96-
puts "worker is about to process #{records.count} records".green unless Rails.env.test?
97-
remove_study_data(records.map(&:nct_id))
104+
puts "worker is about to process #{nct_ids.count} records".green unless Rails.env.test?
105+
remove_study_data(nct_ids)
98106

99107
@collections = Hash.new { |h, k| h[k] = [] }
100108
@index = Hash.new { |h, k| h[k] = {} }
@@ -103,11 +111,11 @@ def process(count = 1, records = nil)
103111
StudyRelationship.sorted_mapping.each do |mapping| # process each mapping instructions
104112
process_mapping(mapping, records)
105113
end
106-
107-
# recalculate values for processed records
108-
CalculatedValue.populate_for(records.pluck(:nct_id))
114+
115+
ResultGroup.handle_outcome_result_groups_ids(nct_ids)
116+
CalculatedValue.populate_for(nct_ids)
109117
# mark study records as saved
110-
StudyJsonRecord.version_2.where(nct_id: records.map(&:nct_id)).update_all(saved_study_at: Time.zone.now) # rubocop:disable Rails/SkipsModelValidations
118+
StudyJsonRecord.version_2.where(nct_id: nct_ids).update_all(saved_study_at: Time.zone.now) # rubocop:disable Rails/SkipsModelValidations
111119
end
112120

113121
# private
@@ -119,7 +127,6 @@ def prepare_children(parent, content, children)
119127
collection = [] # this array will collect all the models to be imported
120128
model = mapping[:table].to_s.classify.constantize # get the model from the table name
121129
root = mapping[:root].map(&:to_s) if mapping[:root] # normalize root path to array of strings
122-
123130
mapping_root = root ? content.dig(*root) : content
124131
next if mapping_root.nil? # skip if no root found
125132

@@ -254,13 +261,15 @@ def process_mapping(mapping, records)
254261
print "\r #{mapping[:table]} - #{collection.count}" unless ENV['RAILS_ENV'] == 'test'
255262
model.import(collection)
256263
puts "\r#{mapping[:table]} - #{collection.count}" unless ENV['RAILS_ENV'] == 'test'
264+
257265
if mapping[:index]
258266
index = [:nct_id] + mapping[:index]
259267
collection.each do |row|
260268
row_index = index.map { |i| row.send(i) }
261269
@index[mapping[:table]][row_index] = row.id
262270
end
263271
end
272+
264273
save_children(collection)
265274
rescue
266275
raise "Error processing #{mapping[:table]}"

db/foreign_keys.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,5 +100,11 @@
100100
"parent_table": "result_groups",
101101
"child_column": "result_group_id",
102102
"parent_column": "id"
103+
},
104+
{
105+
"child_table": "result_groups",
106+
"parent_table": "outcomes",
107+
"child_column": "outcome_id",
108+
"parent_column": "id"
103109
}
104110
]
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
class AddOutomeIdToResultGroups < ActiveRecord::Migration[6.0]
2+
def change
3+
add_column :result_groups, :outcome_id, :integer
4+
end
5+
end

lib/tasks/load.rake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,12 @@ namespace :db do
1010

1111
desc 'process study json records'
1212
task :import_study, [:nct_id] => :environment do |t, args|
13+
dbmgr = Util::DbManager.new
14+
dbmgr.remove_constraints
1315
worker = StudyJsonRecord::Worker.new
1416
records = StudyJsonRecord.where(nct_id: args[:nct_id], version: '2')
1517
worker.process(1, records)
18+
dbmgr.add_constraints
1619
end
1720

1821
desc 'process study json records'

spec/models/baseline_count_spec.rb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ def expected_result_group_data
5858
'ctgov_group_code' => record['id'],
5959
'result_type' => 'Baseline',
6060
'title' => record['title'],
61-
'description' => record['description']
61+
'description' => record['description'],
62+
'outcome_id' => nil
6263
}
6364
end
6465
end

0 commit comments

Comments
 (0)