Skip to content

Commit 3369142

Browse files
Merge pull request #39 from NHSDigital/generate-image-data
Generate image data
2 parents 1ffedaa + c4a4082 commit 3369142

File tree

13 files changed

+510
-128
lines changed

13 files changed

+510
-128
lines changed

app/assets/sass/_misc.scss

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
.app-mammogram-image--placeholder {
22
background-color: black;
3-
width: 150px;
4-
height:200px;
3+
width: 75px;
4+
height:100px;
55

66
p {
77
color: white;

app/data/test-scenarios.js

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
/**
44
* Test scenarios define specific participants and events that should always exist
55
* in the generated data. This ensures we have consistent test cases.
6-
*
6+
*
77
* Only specify what needs to be consistent - any unspecified fields will be randomly generated.
88
* This allows natural variation while maintaining key test conditions.
99
*/
@@ -20,12 +20,15 @@ module.exports = [
2020
ethnicBackground: null,
2121
},
2222
extraNeeds: ['Wheelchair user'],
23-
defaultRiskLevel: 'routine',
24-
},
25-
scheduling: {
26-
whenRelativeToToday: 0,
27-
status: 'event_scheduled',
28-
approximateTime: '10:30',
23+
config: {
24+
defaultRiskLevel: 'routine',
25+
repeatView: 'RMLO',
26+
scheduling: {
27+
whenRelativeToToday: 0,
28+
status: 'event_scheduled',
29+
approximateTime: '10:30',
30+
},
31+
},
2932
},
3033
},
3134
{
@@ -40,13 +43,15 @@ module.exports = [
4043
ethnicBackground: null,
4144
},
4245
extraNeeds: null,
43-
defaultRiskLevel: 'routine',
44-
},
45-
scheduling: {
46-
whenRelativeToToday: 0,
47-
status: 'event_checked_in',
48-
approximateTime: '11:30',
49-
// slotIndex: 20,
46+
config: {
47+
defaultRiskLevel: 'routine',
48+
scheduling: {
49+
whenRelativeToToday: 0,
50+
status: 'event_checked_in',
51+
approximateTime: '11:30',
52+
// slotIndex: 20,
53+
},
54+
},
5055
},
5156
},
5257
]

app/lib/generate-seed-data.js

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ const testScenarios = require('../data/test-scenarios')
2828
// Create an index of participants by risk level for efficient lookup
2929
const createParticipantIndices = (participants, clinicDate, events = []) => {
3030
// console.time('Creating participant indices')
31-
31+
3232
const riskLevelIndex = {}
3333
const screeningHistoryIndex = new Map()
3434

@@ -43,13 +43,13 @@ const createParticipantIndices = (participants, clinicDate, events = []) => {
4343
if (age >= ageRange.lower && age <= ageRange.upper) {
4444
// Initialize array for this risk level if needed
4545
riskLevelIndex[riskLevel] = riskLevelIndex[riskLevel] || []
46-
46+
4747
// Add participant to their risk level index
4848
riskLevelIndex[riskLevel].push(participant)
4949
}
5050

5151
// Track all screening events for this participant
52-
const participantEvents = events.filter(event =>
52+
const participantEvents = events.filter(event =>
5353
event.participantId === participant.id
5454
)
5555
screeningHistoryIndex.set(participant.id, participantEvents)
@@ -83,12 +83,12 @@ const findNearestSlot = (slots, targetTime) => {
8383
return eligibleSlots.reduce((nearest, slot) => {
8484
const slotTime = dayjs(slot.dateTime)
8585
const slotMinutes = slotTime.hour() * 60 + slotTime.minute()
86-
86+
8787
if (!nearest) return slot
8888

8989
const currentDiff = Math.abs(targetMinutes - slotMinutes)
9090
const nearestDiff = Math.abs(
91-
targetMinutes -
91+
targetMinutes -
9292
(dayjs(nearest.dateTime).hour() * 60 + dayjs(nearest.dateTime).minute())
9393
)
9494

@@ -105,9 +105,9 @@ const generateClinicsForDay = (date, allParticipants, unit, usedParticipantsInSn
105105
const isRecentSnapshot = dayjs(date).isAfter(dayjs().subtract(1, 'month'))
106106

107107
// Only look for test scenarios in recent snapshots
108-
const testScenariosForDay = isRecentSnapshot
108+
const testScenariosForDay = isRecentSnapshot
109109
? testScenarios.filter(scenario => {
110-
const targetDate = dayjs().startOf('day').add(scenario.scheduling.whenRelativeToToday, 'day')
110+
const targetDate = dayjs().startOf('day').add(scenario.participant.config.scheduling.whenRelativeToToday, 'day')
111111
return targetDate.isSame(dayjs(date).startOf('day'), 'day')
112112
})
113113
: []
@@ -124,14 +124,14 @@ const generateClinicsForDay = (date, allParticipants, unit, usedParticipantsInSn
124124
// For test scenarios, only use first clinic of the day
125125
if (testScenariosForDay.length > 0 && newClinics.length > 0) {
126126
const firstClinic = newClinics[0]
127-
127+
128128
testScenariosForDay.forEach(scenario => {
129129
const participant = participants.find(p => p.id === scenario.participant.id)
130130
if (!participant) return
131131

132-
const slot = scenario.scheduling.slotIndex !== undefined
133-
? firstClinic.slots[scenario.scheduling.slotIndex]
134-
: findNearestSlot(firstClinic.slots, scenario.scheduling.approximateTime)
132+
const slot = scenario.participant.config.scheduling.slotIndex !== undefined
133+
? firstClinic.slots[scenario.participant.config.scheduling.slotIndex]
134+
: findNearestSlot(firstClinic.slots, scenario.participant.config.scheduling.approximateTime)
135135

136136
if (!slot) {
137137
console.log(`Warning: Could not find suitable slot for test participant ${participant.id}`)
@@ -143,7 +143,7 @@ const generateClinicsForDay = (date, allParticipants, unit, usedParticipantsInSn
143143
participant,
144144
clinic: firstClinic,
145145
outcomeWeights: config.screening.outcomes[firstClinic.clinicType],
146-
forceStatus: scenario.scheduling.status,
146+
forceStatus: scenario.participant.config.scheduling.status,
147147
})
148148

149149
events.push(event)
@@ -164,7 +164,7 @@ const generateClinicsForDay = (date, allParticipants, unit, usedParticipantsInSn
164164
const selectedRiskLevel = weighted.select(
165165
Object.fromEntries(
166166
availableRiskLevels.map(level => [
167-
level,
167+
level,
168168
riskLevels[level].weight
169169
])
170170
)
@@ -175,7 +175,7 @@ const generateClinicsForDay = (date, allParticipants, unit, usedParticipantsInSn
175175
.filter(p => !usedParticipantsInSnapshot.has(p.id))
176176

177177
if (availableParticipants.length === 0) {
178-
178+
179179
const newParticipant = generateParticipant({
180180
ethnicities,
181181
breastScreeningUnits: [unit],
@@ -278,7 +278,7 @@ const generateData = async () => {
278278
console.log(`Generating data for ${unit.name}...`)
279279

280280
let unitEvents = [] // Track events for this unit across snapshots
281-
281+
282282
// Process each snapshot
283283
const unitData = snapshots.map(dates => {
284284
// Create a set to track used participants for this entire snapshot
@@ -291,7 +291,7 @@ const generateData = async () => {
291291
// console.log(`- ${indices.screeningHistoryIndex.size} participants with history`)
292292

293293
// Process each day in the snapshot
294-
const snapshotData = dates.map(date =>
294+
const snapshotData = dates.map(date =>
295295
generateClinicsForDay(date, participants, unit, usedParticipantsInSnapshot, indices)
296296
)
297297

app/lib/generators/event-generator.js

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ const { faker } = require('@faker-js/faker')
55
const weighted = require('weighted')
66
const dayjs = require('dayjs')
77
const config = require('../../config')
8+
const { generateMammogramImages } = require('./mammogram-generator')
89

910
const NOT_SCREENED_REASONS = [
1011
'Recent mammogram at different facility',
@@ -114,9 +115,6 @@ const generateEvent = ({ slot, participant, clinic, outcomeWeights, forceStatus
114115
...eventBase,
115116
details: {
116117
...eventBase.details,
117-
imagesTaken: eventStatus === 'event_complete'
118-
? ['RCC', 'LCC', 'RMLO', 'LMLO']
119-
: null,
120118
notScreenedReason: eventStatus === 'event_attended_not_screened'
121119
? faker.helpers.arrayElement(NOT_SCREENED_REASONS)
122120
: null,
@@ -139,6 +137,13 @@ const generateEvent = ({ slot, participant, clinic, outcomeWeights, forceStatus
139137
actualEndTime: actualEndTime.toISOString(),
140138
actualDuration: actualEndTime.diff(actualStartTime, 'minute'),
141139
}
140+
141+
// Add mammogram images for completed events
142+
event.mammogramData = generateMammogramImages({
143+
startTime: actualStartTime,
144+
isSeedData: true,
145+
config: participant.config
146+
})
142147
}
143148

144149
return event
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
// app/lib/generators/mammogram-generator.js
2+
3+
const { faker } = require('@faker-js/faker')
4+
const generateId = require('../utils/id-generator')
5+
const dayjs = require('dayjs')
6+
const weighted = require('weighted')
7+
8+
const STANDARD_VIEWS = [
9+
{ side: 'right', view: 'mediolateral oblique' },
10+
{ side: 'right', view: 'craniocaudal' },
11+
{ side: 'left', view: 'craniocaudal' },
12+
{ side: 'left', view: 'mediolateral oblique' }
13+
]
14+
15+
const REPEAT_REASONS = [
16+
'patient movement',
17+
'positioning issue',
18+
'exposure issue',
19+
'blurred image',
20+
'technical fault'
21+
]
22+
23+
// Default probability settings
24+
const DEFAULT_PROBABILITIES = {
25+
viewMissing: 0.05, // 5% chance of a view being missing
26+
needsRepeat: 0.15 // 15% chance of one view needing a repeat
27+
}
28+
29+
const generateViewKey = (side, view) => {
30+
const prefix = side === 'right' ? 'right' : 'left'
31+
const viewName = view === 'mediolateral oblique' ? 'MediolateralOblique' : 'Craniocaudal'
32+
return `${prefix}${viewName}`
33+
}
34+
35+
const generateImageUrl = (side, view, accessionNumber) => {
36+
const sideCode = side === 'right' ? 'R' : 'L'
37+
const viewCode = view === 'mediolateral oblique' ? 'MLO' : 'CC'
38+
return `/images/mammograms/${sideCode}-${viewCode}-${accessionNumber.replace('/', '-')}.dcm`
39+
}
40+
41+
/**
42+
* Generate images for a single view
43+
* @param {Object} params - Parameters for image generation
44+
* @param {string} params.side - Breast side ('right' or 'left')
45+
* @param {string} params.view - View type ('mediolateral oblique' or 'craniocaudal')
46+
* @param {string} params.accessionBase - Base accession number
47+
* @param {number} params.startIndex - Starting index for image numbering
48+
* @param {string} params.startTime - Start timestamp
49+
* @param {boolean} params.isSeedData - Whether generating seed data
50+
* @param {boolean} [params.needsRepeat] - Force this view to be repeated
51+
* @returns {Object} View data with images
52+
*/
53+
const generateViewImages = ({ side, view, accessionBase, startIndex, startTime, isSeedData, needsRepeat = false }) => {
54+
let currentIndex = startIndex
55+
let currentTime = dayjs(startTime)
56+
const images = []
57+
58+
// Generate initial image
59+
images.push({
60+
timestamp: currentTime.toISOString(),
61+
accessionNumber: `${accessionBase}/${currentIndex}`,
62+
url: generateImageUrl(side, view, `${accessionBase}/${currentIndex}`)
63+
})
64+
65+
// Generate repeat if needed
66+
if (needsRepeat) {
67+
currentIndex++
68+
currentTime = currentTime.add(faker.number.int({ min: 25, max: 50 }), 'seconds')
69+
70+
images.push({
71+
timestamp: currentTime.toISOString(),
72+
accessionNumber: `${accessionBase}/${currentIndex}`,
73+
url: generateImageUrl(side, view, `${accessionBase}/${currentIndex}`)
74+
})
75+
}
76+
77+
return {
78+
side,
79+
view,
80+
viewShort: view === 'mediolateral oblique' ? 'MLO' : 'CC',
81+
viewShortWithSide: `${side === 'right' ? 'R' : 'L'}${view === 'mediolateral oblique' ? 'MLO' : 'CC'}`,
82+
images,
83+
isRepeat: needsRepeat && isSeedData,
84+
repeatReason: needsRepeat && isSeedData ? faker.helpers.arrayElement(REPEAT_REASONS) : null
85+
}
86+
}
87+
88+
/**
89+
* Generate a complete set of mammogram images
90+
* @param {Object} options - Generation options
91+
* @param {Date|string} [options.startTime] - Starting timestamp (defaults to now)
92+
* @param {boolean} [options.isSeedData=false] - Whether generating seed data
93+
* @param {Object} [options.config] - Optional configuration for specific scenarios
94+
* @param {string} [options.config.repeatView] - Force a specific view to be repeated (e.g. 'RMLO')
95+
* @param {string[]} [options.config.missingViews] - Array of views to omit (e.g. ['RMLO'])
96+
* @param {Object} [options.probabilities] - Override default probabilities
97+
* @returns {Object} Complete mammogram data
98+
*/
99+
const generateMammogramImages = ({
100+
startTime = new Date(),
101+
isSeedData = false,
102+
config = {},
103+
probabilities = DEFAULT_PROBABILITIES
104+
} = {}) => {
105+
const accessionBase = faker.number.int({ min: 100000000, max: 999999999 }).toString()
106+
let currentIndex = 1
107+
let currentTime = dayjs(startTime)
108+
const views = {}
109+
110+
// Determine which view gets repeated (if any)
111+
let viewToRepeat = null
112+
if (config.repeatView) {
113+
viewToRepeat = config.repeatView
114+
} else if (Math.random() < probabilities.needsRepeat) {
115+
viewToRepeat = faker.helpers.arrayElement(['RMLO', 'RCC', 'LCC', 'LMLO'])
116+
}
117+
118+
// Generate each standard view
119+
STANDARD_VIEWS.forEach(({ side, view }) => {
120+
const viewKey = generateViewKey(side, view)
121+
const viewShortWithSide = `${side === 'right' ? 'R' : 'L'}${view === 'mediolateral oblique' ? 'MLO' : 'CC'}`
122+
123+
// Skip if this view is in missingViews config
124+
if (config.missingViews?.includes(viewShortWithSide) ||
125+
(!config.missingViews && Math.random() < probabilities.viewMissing)) {
126+
return
127+
}
128+
129+
const viewData = generateViewImages({
130+
side,
131+
view,
132+
accessionBase,
133+
startIndex: currentIndex,
134+
startTime: currentTime.toISOString(),
135+
isSeedData,
136+
needsRepeat: viewToRepeat === viewShortWithSide
137+
})
138+
139+
views[viewKey] = viewData
140+
141+
// Update counters for next view
142+
currentIndex += viewData.images.length
143+
currentTime = currentTime.add(faker.number.int({ min: 45, max: 70 }), 'seconds')
144+
})
145+
146+
// Calculate metadata
147+
const totalImages = Object.values(views).reduce((sum, view) => sum + view.images.length, 0)
148+
const allTimestamps = Object.values(views)
149+
.flatMap(view => view.images.map(img => img.timestamp))
150+
.sort()
151+
152+
return {
153+
accessionBase,
154+
views,
155+
metadata: {
156+
totalImages,
157+
standardViewsCompleted: Object.keys(views).length === 4,
158+
startTime: allTimestamps[0],
159+
endTime: allTimestamps[allTimestamps.length - 1]
160+
}
161+
}
162+
}
163+
164+
module.exports = {
165+
generateMammogramImages,
166+
STANDARD_VIEWS,
167+
REPEAT_REASONS
168+
}

0 commit comments

Comments
 (0)