Skip to content

Commit ab2be06

Browse files
committed
Merge branch 'master' of github.com:ctti-clinicaltrials/aact
2 parents 466cd58 + 49976a3 commit ab2be06

File tree

6 files changed

+76
-45
lines changed

6 files changed

+76
-45
lines changed

app/models/search_result.rb

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,12 @@ def self.study_values(study)
3232
study_nct_id = study.nct_id
3333
id_values = study.id_information.pluck(:id_value).join('|')
3434
sponsors = study.sponsors
35-
grouped = sponsors.group_by(&:lead_or_collaborator)
36-
lead = grouped['lead'].first
37-
collaborators = grouped['collaborator']
38-
collab_names = collaborators.map{|collab| "#{collab.name}[#{collab.agency_class}]"}.join('|') if collaborators
35+
unless sponsors.blank?
36+
grouped = sponsors.group_by(&:lead_or_collaborator)
37+
lead = grouped['lead'].first
38+
collaborators = grouped['collaborator']
39+
collab_names = collaborators.map{|collab| "#{collab.name}[#{collab.agency_class}]"}.join('|') if collaborators
40+
end
3941
interventions = study.interventions
4042
intervention_name_type = []
4143
intervention_details = []
@@ -105,9 +107,9 @@ def self.study_values(study)
105107
study.why_stopped, #why_stopped
106108
hcq_query(study) ? 'Yes' : 'No', #hcq
107109
study.has_dmc ? 'Yes' : 'No', #has_dmc
108-
sponsors.pluck(:agency_class).uniq.join('|'), #funded_bys
109-
sponsors.pluck(:name).join('|'), #sponsor_collaborators
110-
lead ? "#{lead.name}[#{lead.agency_class}]" : nil, #lead_sponsor
110+
sponsors.blank? ? '' : sponsors.pluck(:agency_class).uniq.join('|'), #funded_bys
111+
sponsors.blank? ? '' : sponsors.pluck(:name).join('|'), #sponsor_collaborators
112+
lead.blank? ? nil : "#{lead.name}[#{lead.agency_class}]", #lead_sponsor
111113
collab_names, #collaborators
112114
study.study_type, #study_type
113115
study.phase.try(:split, '/').try(:join, '|'), #phases

app/models/study_json_record.rb

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -73,27 +73,47 @@ def self.download_all_studies(url='https://ClinicalTrials.gov/AllAPIJSON.zip')
7373
end
7474

7575
def self.full
76-
start_time = Time.current
76+
total_time = 0
7777
study_download = download_all_studies
7878
nct_ids = StudyJsonRecord.all.map(&:nct_id)
79+
remove_indexes_and_constraints
7980
clear_out_data_for(nct_ids)
8081

8182
Zip::File.open(study_download.path) do |unzipped_folders|
8283
original_count = unzipped_folders.size
8384
@count_down = original_count
8485
unzipped_folders.each do |file|
8586
begin
86-
contents = file.get_input_stream.read
87-
json = JSON.parse(contents)
88-
study = json['FullStudy']
89-
save_single_study(study)
87+
unless file.name =~/contents/i
88+
start = Time.now
89+
90+
contents = file.get_input_stream.read
91+
json = JSON.parse(contents)
92+
study = json['FullStudy']
93+
save_single_study(study)
94+
95+
duration = start - Time.now
96+
total_time += duration
97+
98+
puts "#{@count_down -= 1}, took #{htime(duration)}, total time so far #{htime(total_time)}, Study Count: #{Study.count}"
99+
end
90100
rescue Exception => error
91101
msg="#{error.message} (#{error.class} #{error.backtrace}"
92102
ErrorLog.error(msg)
93103
Airbrake.notify(error)
94104
end
95105
end
96106
end
107+
add_indexes_and_constraints
108+
end
109+
110+
def self.htime(seconds)
111+
seconds = seconds.to_i
112+
hours = seconds / 3600
113+
seconds -= hours * 3600
114+
minutes = seconds / 60
115+
seconds -= minutes * 60
116+
"#{hours}:#{'%02i' % minutes}:#{'%02i' % seconds}"
97117
end
98118

99119
def self.incremental
@@ -1648,7 +1668,9 @@ def save_design_groups(design_groups)
16481668
def save_with_result_group(group, name_of_model='BaselineMeasurement')
16491669
return unless group
16501670

1651-
group.each{|i| i[:result_group_id] = @study_result_groups[i[:ctgov_group_code]]}
1671+
group.map do |i|
1672+
i[:result_group_id] = @study_result_groups[i[:ctgov_group_code]].try(:id)
1673+
end
16521674
name_of_model.safe_constantize.import(group, validate: false)
16531675
end
16541676

app/models/study_search.rb

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,14 @@ def load_update(days_back=2)
5959

6060
def self.execute(days_back=2)
6161
queries = all
62-
queries.each do |query|
63-
print "running query group: #{query.grouping}..."
64-
query.load_update(days_back)
65-
puts "group is done"
62+
begin
63+
queries.each do |query|
64+
print "running query group: #{query.grouping}..."
65+
query.load_update(days_back)
66+
puts "group is done"
67+
end
68+
rescue => e
69+
Airbrake.notify(e)
6670
end
6771
end
6872

app/models/util/db_manager.rb

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,14 @@ def dump_database
4848
# 3. restore teh db from file
4949
# 4. verify the study count (permissions are not granted again to prevent bad data from being used)
5050
# 5. grant connection permissions again
51-
def restore_database(schema_type, connection, filename)
52-
schema = 'ctgov'
51+
def restore_database(schema, connection, filename)
52+
if schema =~ /beta/
53+
schema = 'ctgov_beta'
54+
elsif schema =~ /archive/
55+
schema = 'ctgov_archive'
56+
else
57+
schema = 'ctgov'
58+
end
5359
config = connection.instance_variable_get('@config')
5460
host, port, username, database, password = config[:host], config[:port], config[:username], config[:database], config[:password]
5561

@@ -209,7 +215,7 @@ def add_constraints
209215
migration.add_foreign_key child_table, parent_table, column: child_column, primary_key: parent_column, name: "#{child_table}_#{child_column}_fkey"
210216
rescue => e
211217
log(e)
212-
event.add_problem("#{Time.zone.now}: #{e}")
218+
event.add_problem("#{Time.zone.now}: #{e}") if event
213219
end
214220
}
215221
end
@@ -221,15 +227,15 @@ def remove_indexes_and_constraints
221227
con.remove_foreign_key table_name, column: :nct_id if con.foreign_keys(table_name).map(&:column).include?("nct_id")
222228
rescue => e
223229
log(e)
224-
event.add_problem("#{Time.zone.now}: #{e}")
230+
event.add_problem("#{Time.zone.now}: #{e}") if event
225231
end
226232

227233
con.indexes(table_name).each{|index|
228234
begin
229235
migration.remove_index(index.table, index.columns) if !should_keep_index?(index) and migration.index_exists?(index.table, index.columns)
230236
rescue => e
231237
log(e)
232-
event.add_problem("#{Time.zone.now}: #{e}")
238+
event.add_problem("#{Time.zone.now}: #{e}") if event
233239
end
234240
}
235241
}
@@ -241,7 +247,7 @@ def remove_indexes_and_constraints
241247
con.remove_foreign_key table, column: column if con.foreign_keys(table).map(&:column).include?(column)
242248
rescue => e
243249
log(e)
244-
event.add_problem("#{Time.zone.now}: #{e}")
250+
event.add_problem("#{Time.zone.now}: #{e}") if event
245251
end
246252
}
247253
end
@@ -253,7 +259,7 @@ def remove_constrains
253259
con.remove_foreign_key table_name, column: :nct_id if con.foreign_keys(table_name).map(&:column).include?("nct_id")
254260
rescue => e
255261
log(e)
256-
event.add_problem("#{Time.zone.now}: #{e}")
262+
event.add_problem("#{Time.zone.now}: #{e}") if event
257263
end
258264
}
259265

@@ -264,7 +270,7 @@ def remove_constrains
264270
con.remove_foreign_key table, column: column if con.foreign_keys(table).map(&:column).include?(column)
265271
rescue => e
266272
log(e)
267-
event.add_problem("#{Time.zone.now}: #{e}")
273+
event.add_problem("#{Time.zone.now}: #{e}") if event
268274
end
269275
}
270276
end
@@ -284,6 +290,7 @@ def self.loadable_tables
284290
study_json_records
285291
use_cases
286292
use_case_attachments
293+
verifiers
287294
)
288295
table_names=con.tables.reject{|table|blacklist.include?(table)}
289296
end

app/models/util/table_exporter.rb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ def initialize(tables=[],schema='')
66
@schema = schema
77
@temp_dir = "#{Util::FileManager.new.dump_directory}/export"
88
@zipfile_name = "#{@temp_dir}/#{Time.zone.now.strftime('%Y%m%d')}_export.zip"
9-
@connection = ActiveRecord::Base.connection.raw_connection
109
@table_names = tables
1110
create_temp_dir_if_none_exists!
1211
end
@@ -51,8 +50,9 @@ def create_tempfiles(delimiter)
5150

5251
def export_table_to_csv(file, file_name, path, delimiter)
5352
table = File.basename(file_name, delimiter == ',' ? '.csv' : '.txt')
54-
@connection.copy_data("copy #{table} to STDOUT with delimiter '#{delimiter}' csv header") do
55-
while row = @connection.get_copy_data
53+
connection = ActiveRecord::Base.connection.raw_connection
54+
connection.copy_data("copy #{table} to STDOUT with delimiter '#{delimiter}' csv header") do
55+
while row = connection.get_copy_data
5656
# convert all \n to ~. Then when you write to the file, convert last ~ back to \n
5757
# to prevent it from concatenating all rows into one big long string
5858
fixed_row=row.gsub(/\"\"/, '').gsub(/\n\s/, '~').gsub(/\n/, '~')

app/models/util/updater.rb

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ def run
254254
end
255255
finalize_load if status != false
256256
rescue StandardError => e
257+
Airbrake.notify(e)
257258
begin
258259
msg = "#{e.message} (#{e.class} #{e.backtrace}"
259260
log("#{@load_event.event_type} load failed in run: #{msg}")
@@ -269,17 +270,18 @@ def run
269270
end
270271

271272
def full
272-
if should_restart?
273-
log('restarting full load...')
274-
else
275-
log('begin full load ...')
276-
StudyJsonRecord.full
277-
end
278-
truncate_tables unless should_restart?
279-
remove_indexes_and_constraints # Index significantly slow the load process. Will be re-created after data loaded.
280-
study_counts[:should_add] = Support::StudyXmlRecord.not_yet_loaded.count
281-
study_counts[:should_change] = 0
282-
@client.populate_studies
273+
start_time=Time.zone.now
274+
log("storing study statistics data from ClinicalTrials.gov...")
275+
verifier = Verifier.create(source: ClinicalTrialsApi.study_statistics.dig('StudyStatistics', "ElmtDefs", "Study"))
276+
277+
log("begin full load, Start Time: #{start_time}...")
278+
StudyJsonRecord.full
279+
log("took #{time_ago_in_words(start_time)}")
280+
281+
log("verififing study statistics match the aact database...")
282+
verifier.verify(schema)
283+
verifier.write_data_to_file(schema)
284+
283285
MeshTerm.populate_from_file
284286
MeshHeading.populate_from_file
285287
end
@@ -318,7 +320,6 @@ def finalize_load
318320
log('finalizing load...')
319321

320322
load_event.log('add indexes and constraints..')
321-
add_indexes_and_constraints if params[:event_type] == 'full'
322323

323324
load_event.log('execute study search...')
324325
days_back = (Date.today - Date.parse('2013-01-01')).to_i if load_event.event_type == 'full'
@@ -331,9 +332,6 @@ def finalize_load
331332
set_downcase_terms
332333
end
333334

334-
load_event.log('populate admin tables...')
335-
# populate_admin_tables
336-
337335
load_event.log('run sanity checks...')
338336
load_event.run_sanity_checks
339337

@@ -354,8 +352,6 @@ def finalize_load
354352

355353
load_event.log('create flat files...')
356354
create_flat_files(schema)
357-
358-
# Admin::PublicAnnouncement.clear_load_message
359355
end
360356

361357
def remove_indexes_and_constraints

0 commit comments

Comments
 (0)