Skip to content

Commit bd12530

Browse files
author
David Erb
committed
uses plate object to translate luigi filenames to positions
1 parent f6df723 commit bd12530

File tree

2 files changed

+49
-29
lines changed

2 files changed

+49
-29
lines changed

src/rockingester_lib/collectors/direct_poll.py

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@
1111
from dls_utilpack.visit import VisitNotFound, get_xchem_directory
1212
from PIL import Image
1313

14+
# Crystal plate object interface.
15+
from xchembku_api.crystal_plate_objects.interface import (
16+
Interface as CrystalPlateInterface,
17+
)
18+
1419
# Dataface client context.
1520
from xchembku_api.datafaces.context import Context as XchembkuDatafaceClientContext
1621
from xchembku_api.models.crystal_plate_filter_model import CrystalPlateFilterModel
@@ -290,7 +295,7 @@ async def scrape_plate_directory_when_complete(
290295
)
291296

292297
# Get all the well images in the plate directory.
293-
well_names = [
298+
subwell_names = [
294299
entry.name for entry in os.scandir(plate_directory) if entry.is_file()
295300
]
296301

@@ -301,19 +306,20 @@ async def scrape_plate_directory_when_complete(
301306

302307
# Don't handle the plate directory until all images have arrived.
303308
# TODO: Put in some kind of failsafe in direct_poll.py to handle case where all the well images never arrive.
304-
if len(well_names) < crystal_plate_object.get_well_count():
309+
if len(subwell_names) < crystal_plate_object.get_well_count():
305310
return
306311

307312
# Sort wells by name so that tests are deterministic.
308-
well_names.sort()
313+
subwell_names.sort()
309314

310315
crystal_well_models: List[CrystalWellModel] = []
311-
for well_name in well_names:
316+
for subwell_name in subwell_names:
312317
# Make the well model, including image width/height.
313318
crystal_well_model = await self.ingest_well(
314319
plate_directory,
315-
well_name,
320+
subwell_name,
316321
crystal_plate_model,
322+
crystal_plate_object,
317323
target,
318324
)
319325

@@ -331,7 +337,7 @@ async def scrape_plate_directory_when_complete(
331337
)
332338

333339
logger.info(
334-
f"copied {len(well_names)} well images from plate {plate_directory.name} to {target}"
340+
f"copied {len(subwell_names)} well images from plate {plate_directory.name} to {target}"
335341
)
336342

337343
# Remember we "handled" this one.
@@ -341,8 +347,9 @@ async def scrape_plate_directory_when_complete(
341347
async def ingest_well(
342348
self,
343349
plate_directory: Path,
344-
well_name: str,
350+
subwell_name: str,
345351
crystal_plate_model: CrystalPlateModel,
352+
crystal_plate_object: CrystalPlateInterface,
346353
target: Path,
347354
) -> CrystalWellModel:
348355
"""
@@ -351,17 +358,12 @@ async def ingest_well(
351358
Move the well image file to the ingested area.
352359
"""
353360

354-
input_well_filename = plate_directory / well_name
355-
ingested_well_filename = target / well_name
361+
input_well_filename = plate_directory / subwell_name
362+
ingested_well_filename = target / subwell_name
356363

357364
# Stems are like "9acx_01A_1".
358-
# TODO: Improve safety by ignoring wrongly formatted and non-jpg well filenames.
359-
parts = Path(well_name).stem.split("_")
360-
if len(parts) > 1:
361-
# Strip off the leading 4-letter barcode and underscore.
362-
position = "".join(parts[1:])
363-
else:
364-
position = parts[0]
365+
# Convert the stem into a position as shown in soakdb3.
366+
position = crystal_plate_object.normalize_subwell_name(subwell_name)
365367

366368
error = None
367369
try:

tests/test_collector.py

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -141,10 +141,12 @@ async def __run_part1(self, scrapable_image_count, constants, output_directory):
141141
# Make the plate on which the wells reside.
142142
visit = "cm00001-1_otherstuff"
143143
created_crystal_plate_models = []
144+
145+
scrabable_barcode = "98ab"
144146
created_crystal_plate_models.append(
145147
CrystalPlateModel(
146148
formulatrix__plate__id=10,
147-
barcode="98ab",
149+
barcode=scrabable_barcode,
148150
visit=visit,
149151
thing_type=CrystalPlateObjectThingTypes.SWISS3,
150152
)
@@ -183,8 +185,8 @@ async def __run_part1(self, scrapable_image_count, constants, output_directory):
183185
# This one gets scraped as normal.
184186
plate_directory1 = plates_directory / "98ab_2023-04-06_RI1000-0276-3drop"
185187
plate_directory1.mkdir(parents=True)
186-
for i in range(10, 10 + scrapable_image_count):
187-
filename = plate_directory1 / ("98ab_%03dA_1.jpg" % (i))
188+
for i in range(scrapable_image_count):
189+
filename = plate_directory1 / self.__subwell_filename(scrabable_barcode, i)
188190
with open(filename, "w") as stream:
189191
stream.write("")
190192

@@ -195,8 +197,8 @@ async def __run_part1(self, scrapable_image_count, constants, output_directory):
195197
)
196198
plate_directory2.mkdir(parents=True)
197199
nobarcode_image_count = 3
198-
for i in range(10, 10 + nobarcode_image_count):
199-
filename = plate_directory2 / ("%s_%03dA_1.jpg" % (nobarcode_barcode, i))
200+
for i in range(nobarcode_image_count):
201+
filename = plate_directory2 / self.__subwell_filename(nobarcode_barcode, i)
200202
with open(filename, "w") as stream:
201203
stream.write("")
202204

@@ -207,8 +209,8 @@ async def __run_part1(self, scrapable_image_count, constants, output_directory):
207209
)
208210
plate_directory3.mkdir(parents=True)
209211
novisit_image_count = 6
210-
for i in range(10, 10 + novisit_image_count):
211-
filename = plate_directory3 / ("%s_%03dA_1.jpg" % (novisit_barcode, i))
212+
for i in range(novisit_image_count):
213+
filename = plate_directory3 / self.__subwell_filename(novisit_barcode, i)
212214
with open(filename, "w") as stream:
213215
stream.write("")
214216

@@ -219,8 +221,8 @@ async def __run_part1(self, scrapable_image_count, constants, output_directory):
219221
)
220222
plate_directory4.mkdir(parents=True)
221223
excluded_image_count = 2
222-
for i in range(10, 10 + excluded_image_count):
223-
filename = plate_directory4 / ("%s_%03dA_1.jpg" % (excluded_barcode, i))
224+
for i in range(excluded_image_count):
225+
filename = plate_directory4 / self.__subwell_filename(excluded_barcode, i)
224226
with open(filename, "w") as stream:
225227
stream.write("")
226228

@@ -259,10 +261,8 @@ async def __run_part1(self, scrapable_image_count, constants, output_directory):
259261
), "images after scraping"
260262

261263
# Make sure the positions got recorded right in the wells.
262-
i = 10
263-
for crystal_well_model in crystal_well_models:
264-
assert crystal_well_model.position == "%03dA1" % (i)
265-
i += 1
264+
assert crystal_well_models[0].position == "A01a"
265+
assert crystal_well_models[-1].position == "H12d"
266266

267267
# The first "scrapable" plate directory should still exist.
268268
count = sum(1 for _ in plate_directory1.glob("*") if _.is_file())
@@ -305,3 +305,21 @@ async def __run_part2(self, scrapable_image_count, constants, output_directory):
305305
records = await xchembku.fetch_crystal_wells_filenames()
306306

307307
assert len(records) == scrapable_image_count, "images after restarting scraper"
308+
309+
# ----------------------------------------------------------------------------------------
310+
311+
def __subwell_filename(self, barcode, index):
312+
"""
313+
Make a subwell image name which can be parsed by swiss3.
314+
"""
315+
316+
well_letters = "ABCDEFGH"
317+
318+
well = int(index / 3)
319+
subwell = index % 3 + 1
320+
row = well_letters[int(well / 12)]
321+
col = "%02d" % (well % 12 + 1)
322+
323+
subwell_filename = f"{barcode}_{col}{row}_{subwell}"
324+
325+
return subwell_filename

0 commit comments

Comments
 (0)