Skip to content

Commit 584909a

Browse files
lcjohnsoTooyosi
andauthored
Code Tweaks for GZ COSMOS - May 2025 (#206)
* update caesar config script * training - only train on first frame in multi-frame subject * prediction - only predict based on first frame for multi-frame subject * add n_blocks support to metadata and bajor calls --------- Co-authored-by: tooyosi <oluwatoyosi.oyegoke@gmail.com>
1 parent 90d400c commit 584909a

File tree

7 files changed

+105
-38
lines changed

7 files changed

+105
-38
lines changed

app/services/batch/prediction/export_manifest.rb

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -94,23 +94,22 @@ def create_manifest_data(panoptes_client, slices = MANIFEST_SUBJECT_SET_BATCH_SI
9494
end
9595
end.map(&:wait)
9696
subject_responses.each do |subject|
97-
# Create a data row for each image URL in the Subject
98-
# this will duplicate the subject information for each image URL
99-
subject['locations'].each_with_index do |location, frame_id|
100-
manifest_data << [
101-
location.values[0], # image_url
102-
# The subject's JSON information is stored as a string,
103-
# Yes, really - this is the format that hamlet sets up.
104-
JSON.dump(
105-
{
106-
project_id: project_id,
107-
subject_set_id: subject_set_id.to_s,
108-
subject_id: subject['id'],
109-
frame_id: frame_id.to_s
110-
}
111-
)
112-
]
113-
end
97+
# Create a data row for the first frame in the Subject
98+
location = subject['locations'].first
99+
frame_id = 0
100+
manifest_data << [
101+
location.values[0], # image_url
102+
# The subject's JSON information is stored as a string,
103+
# Yes, really - this is the format that hamlet sets up.
104+
JSON.dump(
105+
{
106+
project_id: project_id,
107+
subject_set_id: subject_set_id.to_s,
108+
subject_id: subject['id'],
109+
frame_id: frame_id.to_s
110+
}
111+
)
112+
]
114113
end
115114
ensure
116115
Faraday.default_connection.close

app/services/batch/training/create_job.rb

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,14 @@ def training_options
3333
context.metadata['fixed_crop']
3434
end
3535

36+
n_blocks = if context.metadata.is_a?(Hash) && context.metadata['n_blocks']
37+
context.metadata['n_blocks']
38+
end
39+
3640
{
3741
workflow_name: context.extractor_name,
3842
fixed_crop: fixed_crop,
43+
n_blocks: n_blocks
3944
}.compact
4045
end
4146
end

app/services/format/training_data_csv.rb

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -31,19 +31,18 @@ def run
3131
raise MissingLocationData, "For subject id: #{reduced_subject.id}" if reduced_subject.locations.blank?
3232

3333
# Ensure we handle multi image subjects here
34-
# include 1 line per image for use in training catalogues
35-
reduced_subject.locations.each do |location|
36-
# each location is an object containing only 1 mimetype key and an image URL
37-
image_url = location.values.first
38-
csv << [
39-
grouped_reduction.unique_id,
40-
Zoobot::Storage.container_image_path(image_url),
41-
# fetch all the reduction's saved question:answer values
42-
# ensure we add 0's to the missing column headers - Zoobot demands this!
43-
# https://zoobot.readthedocs.io/guides/training_from_scratch.html#creating-a-catalog
44-
*grouped_reduction.labels.fetch_values(*label_column_headers) { |_key| 0 }
45-
]
46-
end
34+
# assume we will only train on first image
35+
location = reduced_subject.locations.first
36+
# each location is an object containing only 1 mimetype key and an image URL
37+
image_url = location.values.first
38+
csv << [
39+
grouped_reduction.unique_id,
40+
Zoobot::Storage.container_image_path(image_url),
41+
# fetch all the reduction's saved question:answer values
42+
# ensure we add 0's to the missing column headers - Zoobot demands this!
43+
# https://zoobot.readthedocs.io/guides/training_from_scratch.html#creating-a-catalog
44+
*grouped_reduction.labels.fetch_values(*label_column_headers) { |_key| 0 }
45+
]
4746
end
4847
temp_file.rewind
4948
temp_file
Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ def create_workflow_subject_rules_and_effects():
9494

9595
# create the subject rule effect (send to KaDE) config
9696
effect_config = {
97-
'url': f'{KADE_ENDPOINT}/reductions/galaxy_zoo_cosmic_dawn_{subject_rule_task_key.lower()}',
97+
'url': f'{KADE_ENDPOINT}/reductions/galaxy_zoo_jwst_cosmos_{subject_rule_task_key.lower()}',
9898
'reducer_key': f'{subject_rule_task_key}_sum',
9999
'password': KADE_API_BASIC_AUTH_PASSWORD,
100100
'username': KADE_API_BASIC_AUTH_USERNAME
@@ -114,7 +114,7 @@ def create_workflow_subject_rules_and_effects():
114114
if __name__ == '__main__':
115115
"""
116116
Setup a an Active Learning Loop workflow for Caesar
117-
For the Cosmic Dawn Survey dataset
117+
For the JWST COSMOS Survey dataset
118118
"""
119119
FORMAT = '%(asctime)s - %(levelname)s - %(message)s'
120120
# set level to DEBUG to get the panoptes network traffic
@@ -161,7 +161,8 @@ def create_workflow_subject_rules_and_effects():
161161
caesar = Caesar(endpoint=caesar_endpoint)
162162

163163
# currently all extractors are 'question' type, if this changes we can add them here for task key lookup tuple
164-
GZ_DECISION_TREE_TASK_KEYS = {'T0':True,'T1':True,'T2':True,'T3':True,'T4':True,'T5':True,'T6':True,'T7':True,'T8':True,'T11':True,'T12':True,'T13':True,'T14':True,'T15':True}
164+
#GZ_DECISION_TREE_TASK_KEYS = {'T0':True,'T1':True,'T2':True,'T3':True,'T4':True,'T5':True,'T6':True,'T7':True,'T8':True,'T11':True,'T12':True,'T13':True,'T14':True,'T15':True}
165+
GZ_DECISION_TREE_TASK_KEYS = {'T0':True,'T2':True,'T3':True,'T4':True,'T5':True,'T6':True,'T7':True,'T8':True,'T11':True,'T12':True,'T19':True}
165166
# setup known count reducer keys
166167
COUNT_REDUCER_KEYS = {f'{task_key}_count': task_key for task_key in GZ_DECISION_TREE_TASK_KEYS}
167168
# setup known sum reducer keys
@@ -172,7 +173,7 @@ def create_workflow_subject_rules_and_effects():
172173
# enough information to augment the trained ML system
173174
# each new classification will send data to kade till retirement
174175
# finally - these numbers can be easily adjusted post setup in the caesar UI system, https://caesar.zooniverse.org/workflows
175-
NUM_CLASSIFICATIONS_BEFORE_SEND_TO_KADE = 3 if args.caesar_env == 'production' else 1
176+
NUM_CLASSIFICATIONS_BEFORE_SEND_TO_KADE = 5 if args.caesar_env == 'production' else 1 # was: 3 else 1
176177

177178
# lookup the worklfow
178179
zoo_api_workflow = Workflow.find(args.workflow_id)

lib/bajor/client.rb

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ def bajor_service_host
138138

139139
def build_opts(options, include_schema=true)
140140
raw = options.with_indifferent_access
141-
overrides = raw.symbolize_keys.slice(:workflow_name, :fixed_crop)
141+
overrides = raw.symbolize_keys.slice(:workflow_name, :fixed_crop, :n_blocks)
142142

143143
DEFAULT_OPTIONS
144144
.merge(overrides)
@@ -149,6 +149,10 @@ def build_opts(options, include_schema=true)
149149
run_opts << "--fixed-crop '#{o[:fixed_crop].to_json}'"
150150
o.delete(:fixed_crop)
151151
end
152+
if o[:n_blocks].present?
153+
run_opts << "--n-blocks #{o[:n_blocks]}"
154+
o.delete(:n_blocks)
155+
end
152156
o[:run_opts] = run_opts.join(' ') if run_opts.any?
153157
end
154158
end

spec/lib/bajor/client_spec.rb

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,15 @@
33
require 'bajor/client'
44
require 'rails_helper'
55

6-
def build_expected_body(manifest_url: nil, manifest_path: nil, workflow_name:, fixed_crop: nil)
6+
def build_expected_body(manifest_url: nil, manifest_path: nil, workflow_name:, fixed_crop: nil, n_blocks: nil)
77
opts = {
88
workflow_name: workflow_name
99
}
1010

1111
run_opts = []
1212
run_opts << "--schema #{workflow_name}" if manifest_path
1313
run_opts << "--fixed-crop '#{fixed_crop.to_json}'" if fixed_crop
14+
run_opts << "--n-blocks #{n_blocks}" if n_blocks
1415
opts[:run_opts] = run_opts.join(' ') unless run_opts.empty?
1516

1617
if manifest_url
@@ -154,6 +155,35 @@ def build_expected_body(manifest_url: nil, manifest_path: nil, workflow_name:, f
154155
end
155156
end
156157

158+
context 'with jswt_cosmos workflow and n_blocks' do
159+
let(:workflow_name) { 'jswt_cosmos' }
160+
let(:n_blocks) { 2 }
161+
162+
let(:expected_body) { build_expected_body(manifest_path: catalogue_manifest_path, workflow_name: workflow_name, n_blocks: n_blocks) }
163+
let(:request) do
164+
stub_request(:post, request_url)
165+
.with(
166+
body: expected_body.to_json,
167+
headers: request_headers
168+
)
169+
end
170+
171+
before do
172+
request.to_return(status: 201, body: expected_body.to_json, headers: { content_type: 'application/json' })
173+
end
174+
175+
it 'sends jswt_cosmos workflow and n_blocks settings' do
176+
bajor_client.create_training_job(
177+
catalogue_manifest_path,
178+
{ workflow_name: workflow_name, n_blocks: n_blocks }
179+
)
180+
expect(
181+
a_request(:post, request_url)
182+
.with(body: expected_body, headers: request_headers)
183+
).to have_been_made.once
184+
end
185+
end
186+
157187
context 'with jswt_cosmos workflow and no fixed crop' do
158188
let(:workflow_name) { 'jswt_cosmos' }
159189
let(:expected_body) { build_expected_body(manifest_path: catalogue_manifest_path, workflow_name: workflow_name) }
@@ -315,6 +345,36 @@ def build_expected_body(manifest_url: nil, manifest_path: nil, workflow_name:, f
315345
end
316346
end
317347

348+
context 'with jswt_cosmos workflow and n_blocks' do
349+
let(:workflow_name) { 'jswt_cosmos' }
350+
let(:n_blocks) { 2 }
351+
352+
let(:expected_body) { build_expected_body(manifest_url: manifest_url, workflow_name: workflow_name, n_blocks: n_blocks) }
353+
354+
let(:request) do
355+
stub_request(:post, request_url)
356+
.with(
357+
body: expected_body.to_json,
358+
headers: request_headers
359+
)
360+
end
361+
362+
before do
363+
request.to_return(status: 201, body: bajor_response_body.to_json, headers: { content_type: 'application/json' })
364+
end
365+
366+
it 'sends jswt_cosmos workflow and n_blocks settings' do
367+
bajor_client.create_prediction_job(
368+
manifest_url,
369+
{ workflow_name: workflow_name, n_blocks: n_blocks }
370+
)
371+
expect(
372+
a_request(:post, request_url)
373+
.with(body: expected_body.to_json, headers: request_headers)
374+
).to have_been_made.once
375+
end
376+
end
377+
318378
context 'with jswt_cosmos workflow and no fixed crop' do
319379
let(:workflow_name) { 'jswt_cosmos' }
320380
let(:expected_body) { build_expected_body(manifest_url: manifest_url, workflow_name: workflow_name) }

spec/services/format/training_data_csv_spec.rb

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,7 @@
8686
end
8787
let(:expected_lines) do
8888
[
89-
'8000_231121_468,/test/2f2490b4-65c1-4dca-ba25-c44128aa7a39.jpeg,3,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0',
90-
'8000_231121_468,/test/fdccb1cf-0fc9-49b5-b054-62c83bccb9cd.jpeg,3,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0'
89+
'8000_231121_468,/test/2f2490b4-65c1-4dca-ba25-c44128aa7a39.jpeg,3,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0'
9190
].join("\n")
9291
end
9392

0 commit comments

Comments
 (0)