diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
index c90c46f..45b33ab 100644
--- a/.github/workflows/run-tests.yml
+++ b/.github/workflows/run-tests.yml
@@ -38,7 +38,7 @@ jobs:
run_tests:
strategy:
matrix:
- python_version: ["3.10", "3.11", "3.12", "3.13"]
+ python_version: ["3.11", "3.12", "3.13"]
os: [ubuntu-24.04, macos-14]
runs-on: ${{ matrix.os }}
env:
diff --git a/docs/src/examples/cytodataframe_at_a_glance.ipynb b/docs/src/examples/cytodataframe_at_a_glance.ipynb
index e55905d..3492abb 100644
--- a/docs/src/examples/cytodataframe_at_a_glance.ipynb
+++ b/docs/src/examples/cytodataframe_at_a_glance.ipynb
@@ -49,14 +49,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "CPU times: user 967 ms, sys: 678 ms, total: 1.64 s\n",
- "Wall time: 630 ms\n"
+ "CPU times: user 877 ms, sys: 542 ms, total: 1.42 s\n",
+ "Wall time: 585 ms\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "509bceaad6b04a68b30b58dc6e723961",
+ "model_id": "90a70b1fe81949c2bfd4c58cc1f6f39d",
"version_major": 2,
"version_minor": 0
},
@@ -175,14 +175,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "CPU times: user 989 ms, sys: 704 ms, total: 1.69 s\n",
- "Wall time: 531 ms\n"
+ "CPU times: user 845 ms, sys: 567 ms, total: 1.41 s\n",
+ "Wall time: 482 ms\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "b4c2a53d76214ee5b4947a1da047c526",
+ "model_id": "cf8e8baaf3e64bd78263db0734eadcbf",
"version_major": 2,
"version_minor": 0
},
@@ -274,7 +274,7 @@
"source": [
"%%time\n",
"# view JUMP plate BR00117006 with images and overlaid outlines for segmentation\n",
- "CytoDataFrame(\n",
+ "frame = CytoDataFrame(\n",
" data=f\"{jump_data_path}/BR00117006_shrunken.parquet\",\n",
" data_context_dir=f\"{jump_data_path}/images/orig\",\n",
" data_outline_context_dir=f\"{jump_data_path}/images/outlines\",\n",
@@ -286,7 +286,8 @@
" \"Image_FileName_OrigDNA\",\n",
" \"Image_FileName_OrigRNA\",\n",
" ]\n",
- "][:3]"
+ "][:3]\n",
+ "frame"
]
},
{
@@ -299,14 +300,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "CPU times: user 909 ms, sys: 683 ms, total: 1.59 s\n",
- "Wall time: 478 ms\n"
+ "CPU times: user 844 ms, sys: 530 ms, total: 1.37 s\n",
+ "Wall time: 485 ms\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "f53053d547ee4ef7bd73cc1bb5b56eb6",
+ "model_id": "3931051a74524ba481b50911b96a8921",
"version_major": 2,
"version_minor": 0
},
@@ -425,14 +426,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "CPU times: user 998 ms, sys: 678 ms, total: 1.68 s\n",
- "Wall time: 535 ms\n"
+ "CPU times: user 850 ms, sys: 532 ms, total: 1.38 s\n",
+ "Wall time: 507 ms\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "c9872a2c9e8140688d9d774033e84150",
+ "model_id": "0370cc7dd475438da444fee2c48463d7",
"version_major": 2,
"version_minor": 0
},
@@ -560,14 +561,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "CPU times: user 860 ms, sys: 544 ms, total: 1.4 s\n",
- "Wall time: 515 ms\n"
+ "CPU times: user 873 ms, sys: 573 ms, total: 1.45 s\n",
+ "Wall time: 492 ms\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "97cbe4df04fb4063897033139372d528",
+ "model_id": "9548cc409e5147a99234750f155de932",
"version_major": 2,
"version_minor": 0
},
@@ -684,14 +685,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "CPU times: user 913 ms, sys: 619 ms, total: 1.53 s\n",
- "Wall time: 492 ms\n"
+ "CPU times: user 829 ms, sys: 527 ms, total: 1.36 s\n",
+ "Wall time: 485 ms\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "acdf7ff4a7f442f38aa496e888d6a941",
+ "model_id": "743365d93b084a99ad90d8af6c3a65aa",
"version_major": 2,
"version_minor": 0
},
@@ -810,14 +811,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "CPU times: user 957 ms, sys: 651 ms, total: 1.61 s\n",
- "Wall time: 505 ms\n"
+ "CPU times: user 868 ms, sys: 536 ms, total: 1.4 s\n",
+ "Wall time: 507 ms\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "5ae6de8a7e064553aa571d8cae0a51e1",
+ "model_id": "97a07fa56cfb4067a0d072b999b9d706",
"version_major": 2,
"version_minor": 0
},
@@ -935,14 +936,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "CPU times: user 906 ms, sys: 637 ms, total: 1.54 s\n",
- "Wall time: 488 ms\n"
+ "CPU times: user 826 ms, sys: 480 ms, total: 1.31 s\n",
+ "Wall time: 498 ms\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "66e2c484e61343678e625b2e918eb417",
+ "model_id": "d3e367dd536a44e198c099b47dc289f9",
"version_major": 2,
"version_minor": 0
},
@@ -1070,6 +1071,158 @@
{
"cell_type": "code",
"execution_count": 10,
+ "id": "0febd3ed-b460-4cb3-ba5b-f3de3a33a29a",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: user 2.09 s, sys: 473 ms, total: 2.56 s\n",
+ "Wall time: 5.45 s\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "cab6bdff53ff441894866b56c3a6d179",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Metadata_ImageNumber | \n",
+ " Cells_Number_Object_Number | \n",
+ " Image_FileName_OrigAGP | \n",
+ " Image_FileName_OrigDNA | \n",
+ " Image_FileName_OrigRNA | \n",
+ " Image_FileName_OrigAGP_OMEArrow_ORIG | \n",
+ " Image_FileName_OrigAGP_OMEArrow_LABL | \n",
+ " Image_FileName_OrigAGP_OMEArrow_COMP | \n",
+ " Image_FileName_OrigDNA_OMEArrow_ORIG | \n",
+ " Image_FileName_OrigDNA_OMEArrow_LABL | \n",
+ " Image_FileName_OrigDNA_OMEArrow_COMP | \n",
+ " Image_FileName_OrigRNA_OMEArrow_ORIG | \n",
+ " Image_FileName_OrigRNA_OMEArrow_LABL | \n",
+ " Image_FileName_OrigRNA_OMEArrow_COMP | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " r01c01f01p01-ch2sk1fk1fl1.tiff | \n",
+ " r01c01f01p01-ch5sk1fk1fl1.tiff | \n",
+ " r01c01f01p01-ch3sk1fk1fl1.tiff | \n",
+ "  | \n",
+ " None | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " r01c01f01p01-ch2sk1fk1fl1.tiff | \n",
+ " r01c01f01p01-ch5sk1fk1fl1.tiff | \n",
+ " r01c01f01p01-ch3sk1fk1fl1.tiff | \n",
+ "  | \n",
+ " None | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " r01c01f01p01-ch2sk1fk1fl1.tiff | \n",
+ " r01c01f01p01-ch5sk1fk1fl1.tiff | \n",
+ " r01c01f01p01-ch3sk1fk1fl1.tiff | \n",
+ "  | \n",
+ " None | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": []
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "# export to OME Parquet, a format which uses OME Arrow\n",
+ "# to store OME-spec images as values within the table.\n",
+ "frame.to_ome_parquet(file_path=\"example.ome.parquet\")\n",
+ "\n",
+ "# read OME Parquet file into the CytoDataFrame\n",
+ "CytoDataFrame(data=\"example.ome.parquet\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
"id": "0f845d33-b3b1-4ac7-9e09-ede8042d9306",
"metadata": {},
"outputs": [
@@ -1077,14 +1230,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "CPU times: user 949 ms, sys: 729 ms, total: 1.68 s\n",
- "Wall time: 485 ms\n"
+ "CPU times: user 881 ms, sys: 550 ms, total: 1.43 s\n",
+ "Wall time: 514 ms\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "8d4aedd7a4fc434aa8bc576dad82c80f",
+ "model_id": "c19a8f9c323842a4a3af799a2b2f7f90",
"version_major": 2,
"version_minor": 0
},
@@ -1184,7 +1337,7 @@
"data": {
"text/plain": []
},
- "execution_count": 10,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -1218,7 +1371,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 12,
"id": "365f68d0-3b66-4fb8-a381-8712dc43188c",
"metadata": {},
"outputs": [
@@ -1226,14 +1379,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "CPU times: user 307 ms, sys: 217 ms, total: 524 ms\n",
- "Wall time: 177 ms\n"
+ "CPU times: user 244 ms, sys: 162 ms, total: 406 ms\n",
+ "Wall time: 148 ms\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "a58b7b8cf8f74940b770af2ec3110be1",
+ "model_id": "c7b58a7f6e8c48868cdfa39cf6263e69",
"version_major": 2,
"version_minor": 0
},
@@ -1317,7 +1470,7 @@
"data": {
"text/plain": []
},
- "execution_count": 11,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@@ -1341,7 +1494,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 13,
"id": "de89c992-ea92-4565-b03b-3b27ae46d28c",
"metadata": {},
"outputs": [
@@ -1349,14 +1502,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "CPU times: user 286 ms, sys: 211 ms, total: 497 ms\n",
- "Wall time: 155 ms\n"
+ "CPU times: user 326 ms, sys: 182 ms, total: 508 ms\n",
+ "Wall time: 239 ms\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "9474fab165714ceb979a08b766f603a4",
+ "model_id": "12489f60233f4875922d660eca4fbc77",
"version_major": 2,
"version_minor": 0
},
@@ -1440,7 +1593,7 @@
"data": {
"text/plain": []
},
- "execution_count": 12,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@@ -1448,7 +1601,7 @@
"source": [
"%%time\n",
"# view NF1 Cell Painting data with images and overlaid outlines from masks\n",
- "CytoDataFrame(\n",
+ "frame = CytoDataFrame(\n",
" data=f\"{nf1_cellpainting_path}/Plate_2_with_image_data_shrunken.parquet\",\n",
" data_context_dir=f\"{nf1_cellpainting_path}/Plate_2_images\",\n",
" data_mask_context_dir=f\"{nf1_cellpainting_path}/Plate_2_masks\",\n",
@@ -1460,12 +1613,13 @@
" \"Image_FileName_RFP\",\n",
" \"Image_FileName_DAPI\",\n",
" ]\n",
- "][:3]"
+ "][:3]\n",
+ "frame"
]
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 14,
"id": "49fc2952-eb69-4ed9-bd91-c29843ddbebd",
"metadata": {},
"outputs": [
@@ -1473,14 +1627,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "CPU times: user 313 ms, sys: 216 ms, total: 529 ms\n",
- "Wall time: 176 ms\n"
+ "CPU times: user 261 ms, sys: 177 ms, total: 437 ms\n",
+ "Wall time: 149 ms\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "8f436180c329426e8524a7463eed328d",
+ "model_id": "a6b51158acd4499395d2ce1f83c716b7",
"version_major": 2,
"version_minor": 0
},
@@ -1564,7 +1718,7 @@
"data": {
"text/plain": []
},
- "execution_count": 13,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -1600,7 +1754,159 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 15,
+ "id": "e21df647-9887-49b3-8328-4f861807abce",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: user 958 ms, sys: 182 ms, total: 1.14 s\n",
+ "Wall time: 1.15 s\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "419763b9c98544dfb6a340470f34bf2d",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Metadata_ImageNumber | \n",
+ " Metadata_Cells_Number_Object_Number | \n",
+ " Image_FileName_GFP | \n",
+ " Image_FileName_RFP | \n",
+ " Image_FileName_DAPI | \n",
+ " Image_FileName_GFP_OMEArrow_ORIG | \n",
+ " Image_FileName_GFP_OMEArrow_LABL | \n",
+ " Image_FileName_GFP_OMEArrow_COMP | \n",
+ " Image_FileName_RFP_OMEArrow_ORIG | \n",
+ " Image_FileName_RFP_OMEArrow_LABL | \n",
+ " Image_FileName_RFP_OMEArrow_COMP | \n",
+ " Image_FileName_DAPI_OMEArrow_ORIG | \n",
+ " Image_FileName_DAPI_OMEArrow_LABL | \n",
+ " Image_FileName_DAPI_OMEArrow_COMP | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 353 | \n",
+ " 31 | \n",
+ " 4 | \n",
+ " B7_01_2_3_GFP_001.tif | \n",
+ " B7_01_3_3_RFP_001.tif | \n",
+ " B7_01_1_3_DAPI_001.tif | \n",
+ "  | \n",
+ " None | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "
\n",
+ " \n",
+ " | 1564 | \n",
+ " 113 | \n",
+ " 17 | \n",
+ " H12_01_2_1_GFP_001.tif | \n",
+ " H12_01_3_1_RFP_001.tif | \n",
+ " H12_01_1_1_DAPI_001.tif | \n",
+ "  | \n",
+ " None | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "
\n",
+ " \n",
+ " | 1275 | \n",
+ " 94 | \n",
+ " 5 | \n",
+ " F7_01_2_2_GFP_001.tif | \n",
+ " F7_01_3_2_RFP_001.tif | \n",
+ " F7_01_1_2_DAPI_001.tif | \n",
+ "  | \n",
+ " None | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": []
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "# export to OME Parquet, a format which uses OME Arrow\n",
+ "# to store OME-spec images as values within the table.\n",
+ "frame.to_ome_parquet(file_path=\"example.ome.parquet\")\n",
+ "\n",
+ "# read OME Parquet file into the CytoDataFrame\n",
+ "CytoDataFrame(data=\"example.ome.parquet\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
"id": "5a5304db-7dac-4f45-aa55-dd3f50299c60",
"metadata": {},
"outputs": [
@@ -1608,14 +1914,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "CPU times: user 125 ms, sys: 91.5 ms, total: 216 ms\n",
- "Wall time: 73.4 ms\n"
+ "CPU times: user 92.2 ms, sys: 37.9 ms, total: 130 ms\n",
+ "Wall time: 66.1 ms\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "ae82ec39b1ab4517913a6aaaf04aa1cb",
+ "model_id": "0e19834874994411ab3dbbb3dff790d5",
"version_major": 2,
"version_minor": 0
},
@@ -1699,7 +2005,7 @@
"data": {
"text/plain": []
},
- "execution_count": 14,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@@ -1724,7 +2030,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 17,
"id": "4c9af999-c9a2-4408-aa16-9437d08013ae",
"metadata": {},
"outputs": [
@@ -1732,14 +2038,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "CPU times: user 369 ms, sys: 202 ms, total: 570 ms\n",
- "Wall time: 226 ms\n"
+ "CPU times: user 336 ms, sys: 233 ms, total: 570 ms\n",
+ "Wall time: 185 ms\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "cd820a3a018e4ef19e9c401b0f33f1df",
+ "model_id": "9a2d765c8ca043d1964aa51c32a0b19f",
"version_major": 2,
"version_minor": 0
},
@@ -1833,7 +2139,7 @@
"data": {
"text/plain": []
},
- "execution_count": 15,
+ "execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
@@ -1862,7 +2168,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 18,
"id": "e8ebb16d-ee5f-4a34-b599-aef245b57705",
"metadata": {},
"outputs": [
@@ -1870,10 +2176,24 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "CPU times: user 1 μs, sys: 0 ns, total: 1 μs\n",
- "Wall time: 1.91 μs\n"
+ "CPU times: user 1e+03 ns, sys: 0 ns, total: 1e+03 ns\n",
+ "Wall time: 3.1 μs\n"
]
},
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "646b26ef8b5644fc8094789430209d0d",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
{
"data": {
"text/html": [
@@ -1957,7 +2277,7 @@
"data": {
"text/plain": []
},
- "execution_count": 16,
+ "execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
@@ -1968,6 +2288,168 @@
"# by quick variable reference.\n",
"cdf"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "0892633a-fdd2-448a-a96a-54dad4b5caf8",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: user 895 ms, sys: 236 ms, total: 1.13 s\n",
+ "Wall time: 1.05 s\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "0402208f8d74443b845c657b7fd58954",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Metadata_ImageNumber | \n",
+ " Metadata_Nuclei_Number_Object_Number | \n",
+ " Image_FileName_OrigAGP | \n",
+ " Image_FileName_OrigDNA | \n",
+ " Image_FileName_OrigAGP_OMEArrow_ORIG | \n",
+ " Image_FileName_OrigAGP_OMEArrow_LABL | \n",
+ " Image_FileName_OrigAGP_OMEArrow_COMP | \n",
+ " Image_FileName_OrigDNA_OMEArrow_ORIG | \n",
+ " Image_FileName_OrigDNA_OMEArrow_LABL | \n",
+ " Image_FileName_OrigDNA_OMEArrow_COMP | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " r03c03f03p01-ch3sk1fk1fl1.tiff | \n",
+ " r03c03f03p01-ch5sk1fk1fl1.tiff | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " r03c03f03p01-ch3sk1fk1fl1.tiff | \n",
+ " r03c03f03p01-ch5sk1fk1fl1.tiff | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 6 | \n",
+ " r03c03f03p01-ch3sk1fk1fl1.tiff | \n",
+ " r03c03f03p01-ch5sk1fk1fl1.tiff | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3 | \n",
+ " 7 | \n",
+ " r03c03f03p01-ch3sk1fk1fl1.tiff | \n",
+ " r03c03f03p01-ch5sk1fk1fl1.tiff | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 3 | \n",
+ " 8 | \n",
+ " r03c03f03p01-ch3sk1fk1fl1.tiff | \n",
+ " r03c03f03p01-ch5sk1fk1fl1.tiff | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "  | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": []
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "# export to OME Parquet, a format which uses OME Arrow\n",
+ "# to store OME-spec images as values within the table.\n",
+ "cdf.to_ome_parquet(file_path=\"example.ome.parquet\")\n",
+ "\n",
+ "# read OME Parquet file into the CytoDataFrame\n",
+ "CytoDataFrame(data=\"example.ome.parquet\")"
+ ]
}
],
"metadata": {
diff --git a/docs/src/examples/cytodataframe_at_a_glance.py b/docs/src/examples/cytodataframe_at_a_glance.py
index 09bdcd3..73f4e2d 100644
--- a/docs/src/examples/cytodataframe_at_a_glance.py
+++ b/docs/src/examples/cytodataframe_at_a_glance.py
@@ -54,7 +54,7 @@
# %%time
# view JUMP plate BR00117006 with images and overlaid outlines for segmentation
-CytoDataFrame(
+frame = CytoDataFrame(
data=f"{jump_data_path}/BR00117006_shrunken.parquet",
data_context_dir=f"{jump_data_path}/images/orig",
data_outline_context_dir=f"{jump_data_path}/images/outlines",
@@ -67,6 +67,7 @@
"Image_FileName_OrigRNA",
]
][:3]
+frame
# %%time
@@ -183,6 +184,16 @@
]
][:5].T
+# +
+# %%time
+# export to OME Parquet, a format which uses OME Arrow
+# to store OME-spec images as values within the table.
+frame.to_ome_parquet(file_path="example.ome.parquet")
+
+# read OME Parquet file into the CytoDataFrame
+CytoDataFrame(data="example.ome.parquet")
+# -
+
# %%time
# view JUMP plate BR00117006 with images, changing the bounding box
# using offsets so each image has roughly the same size.
@@ -225,7 +236,7 @@
# %%time
# view NF1 Cell Painting data with images and overlaid outlines from masks
-CytoDataFrame(
+frame = CytoDataFrame(
data=f"{nf1_cellpainting_path}/Plate_2_with_image_data_shrunken.parquet",
data_context_dir=f"{nf1_cellpainting_path}/Plate_2_images",
data_mask_context_dir=f"{nf1_cellpainting_path}/Plate_2_masks",
@@ -238,6 +249,7 @@
"Image_FileName_DAPI",
]
][:3]
+frame
# +
# %%time
@@ -266,6 +278,15 @@
"Image_FileName_DAPI",
]
][:3]
+
+# +
+# %%time
+# export to OME Parquet, a format which uses OME Arrow
+# to store OME-spec images as values within the table.
+frame.to_ome_parquet(file_path="example.ome.parquet")
+
+# read OME Parquet file into the CytoDataFrame
+CytoDataFrame(data="example.ome.parquet")
# -
# %%time
@@ -308,3 +329,12 @@
# show that we can use the cytodataframe again
# by quick variable reference.
cdf
+
+# +
+# %%time
+# export to OME Parquet, a format which uses OME Arrow
+# to store OME-spec images as values within the table.
+cdf.to_ome_parquet(file_path="example.ome.parquet")
+
+# read OME Parquet file into the CytoDataFrame
+CytoDataFrame(data="example.ome.parquet")
diff --git a/media/coverage-badge.svg b/media/coverage-badge.svg
index f81ab84..e21693b 100644
--- a/media/coverage-badge.svg
+++ b/media/coverage-badge.svg
@@ -1 +1 @@
-
\ No newline at end of file
+
\ No newline at end of file
diff --git a/poetry.lock b/poetry.lock
index 5bfe2b9..a460e60 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.1.4 and should not be changed by hand.
[[package]]
name = "accessible-pygments"
@@ -44,10 +44,8 @@ files = [
]
[package.dependencies]
-exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
idna = ">=2.8"
sniffio = ">=1.1"
-typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""}
[package.extras]
doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
@@ -177,9 +175,6 @@ files = [
{file = "async_lru-2.0.4-py3-none-any.whl", hash = "sha256:ff02944ce3c288c5be660c42dbcca0742b32c3b279d6dceda655190240b99224"},
]
-[package.dependencies]
-typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""}
-
[[package]]
name = "attrs"
version = "23.2.0"
@@ -280,8 +275,6 @@ packaging = ">=22.0"
pathspec = ">=0.9.0"
platformdirs = ">=2"
pytokens = ">=0.3.0"
-tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
-typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""}
[package.extras]
colorama = ["colorama (>=0.4.3)"]
@@ -634,23 +627,24 @@ test-no-images = ["pytest", "pytest-cov", "pytest-rerunfailures", "pytest-xdist"
[[package]]
name = "cosmicqc"
-version = "0.1.5"
+version = "1.0.1"
description = "coSMicQC is a software tool for single-cell quality control of morphology datasets."
optional = false
-python-versions = "<3.13,>=3.10"
+python-versions = "<3.14,>=3.10"
groups = ["dev"]
markers = "python_version < \"3.13\""
files = [
- {file = "cosmicqc-0.1.5-py3-none-any.whl", hash = "sha256:18345e226ce02076214df6256b0fe0013d8578722a4271b884d991c8d518ae9e"},
- {file = "cosmicqc-0.1.5.tar.gz", hash = "sha256:2d29bf0955fbcefd54bb2edb02979e1bc4390443d1ce24c854eab5111c769608"},
+ {file = "cosmicqc-1.0.1-py3-none-any.whl", hash = "sha256:b4be60de43c40774aa0d42a3749478d4dff6f08e8b57a2764835cb26a9efe06e"},
+ {file = "cosmicqc-1.0.1.tar.gz", hash = "sha256:3bee8896f0084de9e12e02aa55fe1ef51076d5a8b0dba6f00ec8225340ae8918"},
]
[package.dependencies]
-cytodataframe = ">=0.0.11"
+cytodataframe = {version = ">=0.0.25", markers = "python_version >= \"3.9\""}
fire = ">=0.6,<0.8"
+llvmlite = "0.45.1"
matplotlib = ">=3.10.3,<4.0.0"
pandas = {version = ">=2.2.2,<3.0.0", markers = "python_version >= \"3.9\""}
-pyarrow = ">=16,<21"
+pyarrow = ">=16"
pyyaml = ">=6.0.1,<7.0.0"
scipy = {version = ">=1.13.0,<2.0.0", markers = "python_version >= \"3.9\""}
seaborn = ">=0.13.2,<0.14.0"
@@ -909,22 +903,6 @@ files = [
[package.dependencies]
packaging = ">=20.9"
-[[package]]
-name = "exceptiongroup"
-version = "1.2.1"
-description = "Backport of PEP 654 (exception groups)"
-optional = false
-python-versions = ">=3.7"
-groups = ["main", "dev", "docs"]
-markers = "python_version == \"3.10\""
-files = [
- {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"},
- {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"},
-]
-
-[package.extras]
-test = ["pytest (>=6)"]
-
[[package]]
name = "executing"
version = "2.0.1"
@@ -1211,18 +1189,21 @@ files = [
{file = "imagecodecs-2025.3.30-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:44dc270d78b7cda29e2d430acbd8dab66322766412e596f450871e2831148aa2"},
{file = "imagecodecs-2025.3.30-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cee56331d9a700e9ec518caeba6d9813ffd7c042f1fae47d2dafcdfc259d2a5"},
{file = "imagecodecs-2025.3.30-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e354fa2046bb7029d0a1ff15a8bb31487ca0d479cd42fdb5c312bcd9408ce3fc"},
+ {file = "imagecodecs-2025.3.30-cp311-cp311-win32.whl", hash = "sha256:4ce5c1eb14716bfa733516a69f3b8b77f05cf0541558cc4e8f8991e57d40cc82"},
{file = "imagecodecs-2025.3.30-cp311-cp311-win_amd64.whl", hash = "sha256:7debc7231780d8e44ffcd13aee2178644d93115c19ff73c96cf3068b219ac3a2"},
{file = "imagecodecs-2025.3.30-cp311-cp311-win_arm64.whl", hash = "sha256:2b5c1c02c70da9561da9b728b97599b3ed0ef7d5399979017ce90029f522587b"},
{file = "imagecodecs-2025.3.30-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:dad3f0fc39eb9a88cecb2ccfe0e13eac35b21da36c0171285e4b289b12085235"},
{file = "imagecodecs-2025.3.30-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2806b6e605e674d7e3d21099779a88cb30b9da4807a88e0f02da3ea249085e5f"},
{file = "imagecodecs-2025.3.30-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abfb2231f4741262c91f3e77af85ce1f35b7d44f71414c5d1ba6008cfc3e5672"},
{file = "imagecodecs-2025.3.30-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6583fdcac9a4cd75a7701ed7fac7e74d3836807eb9f8aee22f60f519b748ff56"},
+ {file = "imagecodecs-2025.3.30-cp312-cp312-win32.whl", hash = "sha256:ed187770804cbf322b60e24dfc14b8a1e2c321a1b93afb3a7e4948fbb9e99bf0"},
{file = "imagecodecs-2025.3.30-cp312-cp312-win_amd64.whl", hash = "sha256:0b0f6e0f118674c76982e5a25bfeec5e6fc4fc4fc102c0d356e370f473e7b512"},
{file = "imagecodecs-2025.3.30-cp312-cp312-win_arm64.whl", hash = "sha256:bde3bd80cdf65afddb64af4c433549e882a5aa15d300e3781acab8d4df1c94a9"},
{file = "imagecodecs-2025.3.30-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:0bf7248a7949525848f3e2c7d09e837e8333d52c7ac0436c6eed36235da8227b"},
{file = "imagecodecs-2025.3.30-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3e598b6ec77df2517a8d4af6b66393250ba4a8764fccda5dbe6546236df5d11c"},
{file = "imagecodecs-2025.3.30-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:212ae6ba7c656ddf24e8aabefc56c5e2300335ed1305838508c57de202e6dbe4"},
{file = "imagecodecs-2025.3.30-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfa7b1c7d7af449c8153a040f7782d4296350245f8809e49dd4fb5bef4d740e6"},
+ {file = "imagecodecs-2025.3.30-cp313-cp313-win32.whl", hash = "sha256:66b614488d85d91f456b949fde4ad678dbe95cde38861043122237de086308c1"},
{file = "imagecodecs-2025.3.30-cp313-cp313-win_amd64.whl", hash = "sha256:1c51fef75fec66b4ea5e98b4ab47889942049389278749e1f96329c38f31c377"},
{file = "imagecodecs-2025.3.30-cp313-cp313-win_arm64.whl", hash = "sha256:eda70c0b9d2bcf225f7ae12dbefd0e3ab92ea7db30cdb56b292517fb61357ad7"},
{file = "imagecodecs-2025.3.30.tar.gz", hash = "sha256:29256f44a7fcfb8f235a3e9b3bae72b06ea2112e63bcc892267a8c01b7097f90"},
@@ -1362,7 +1343,6 @@ files = [
[package.dependencies]
colorama = {version = "*", markers = "sys_platform == \"win32\""}
decorator = "*"
-exceptiongroup = {version = "*", markers = "python_version < \"3.11\""}
jedi = ">=0.16"
matplotlib-inline = "*"
pexpect = {version = ">4.3", markers = "sys_platform != \"win32\" and sys_platform != \"emscripten\""}
@@ -1740,7 +1720,6 @@ jupyterlab-server = ">=2.27.1,<3"
notebook-shim = ">=0.2"
packaging = "*"
setuptools = ">=41.1.0"
-tomli = {version = ">=1.2.2", markers = "python_version < \"3.11\""}
tornado = ">=6.2.0"
traitlets = "*"
@@ -1839,7 +1818,6 @@ mdit-py-plugins = "*"
nbformat = "*"
packaging = "*"
pyyaml = "*"
-tomli = {version = "*", markers = "python_version < \"3.11\""}
[package.extras]
dev = ["autopep8", "black", "flake8", "gitpython", "ipykernel", "isort", "jupyter-fs[fs] (>=1.0)", "jupyter-server (!=2.11)", "nbconvert", "pre-commit", "pytest", "pytest-asyncio", "pytest-cov (>=2.6.1)", "pytest-randomly", "pytest-xdist", "sphinx", "sphinx-gallery (>=0.8)"]
@@ -1995,6 +1973,38 @@ dev = ["changelist (==0.5)"]
lint = ["pre-commit (==3.7.0)"]
test = ["pytest (>=7.4)", "pytest-cov (>=4.1)"]
+[[package]]
+name = "llvmlite"
+version = "0.45.1"
+description = "lightweight wrapper around basic LLVM functionality"
+optional = false
+python-versions = ">=3.10"
+groups = ["dev"]
+markers = "python_version < \"3.13\""
+files = [
+ {file = "llvmlite-0.45.1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:1b1af0c910af0978aa55fa4f60bbb3e9f39b41e97c2a6d94d199897be62ba07a"},
+ {file = "llvmlite-0.45.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02a164db2d79088bbd6e0d9633b4fe4021d6379d7e4ac7cc85ed5f44b06a30c5"},
+ {file = "llvmlite-0.45.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f2d47f34e4029e6df3395de34cc1c66440a8d72712993a6e6168db228686711b"},
+ {file = "llvmlite-0.45.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f7319e5f9f90720578a7f56fbc805bdfb4bc071b507c7611f170d631c3c0f1e0"},
+ {file = "llvmlite-0.45.1-cp310-cp310-win_amd64.whl", hash = "sha256:4edb62e685867799e336723cb9787ec6598d51d0b1ed9af0f38e692aa757e898"},
+ {file = "llvmlite-0.45.1-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:60f92868d5d3af30b4239b50e1717cb4e4e54f6ac1c361a27903b318d0f07f42"},
+ {file = "llvmlite-0.45.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:98baab513e19beb210f1ef39066288784839a44cd504e24fff5d17f1b3cf0860"},
+ {file = "llvmlite-0.45.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3adc2355694d6a6fbcc024d59bb756677e7de506037c878022d7b877e7613a36"},
+ {file = "llvmlite-0.45.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2f3377a6db40f563058c9515dedcc8a3e562d8693a106a28f2ddccf2c8fcf6ca"},
+ {file = "llvmlite-0.45.1-cp311-cp311-win_amd64.whl", hash = "sha256:f9c272682d91e0d57f2a76c6d9ebdfccc603a01828cdbe3d15273bdca0c3363a"},
+ {file = "llvmlite-0.45.1-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:28e763aba92fe9c72296911e040231d486447c01d4f90027c8e893d89d49b20e"},
+ {file = "llvmlite-0.45.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1a53f4b74ee9fd30cb3d27d904dadece67a7575198bd80e687ee76474620735f"},
+ {file = "llvmlite-0.45.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b3796b1b1e1c14dcae34285d2f4ea488402fbd2c400ccf7137603ca3800864f"},
+ {file = "llvmlite-0.45.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:779e2f2ceefef0f4368548685f0b4adde34e5f4b457e90391f570a10b348d433"},
+ {file = "llvmlite-0.45.1-cp312-cp312-win_amd64.whl", hash = "sha256:9e6c9949baf25d9aa9cd7cf0f6d011b9ca660dd17f5ba2b23bdbdb77cc86b116"},
+ {file = "llvmlite-0.45.1-cp313-cp313-macosx_10_15_x86_64.whl", hash = "sha256:d9ea9e6f17569a4253515cc01dade70aba536476e3d750b2e18d81d7e670eb15"},
+ {file = "llvmlite-0.45.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:c9f3cadee1630ce4ac18ea38adebf2a4f57a89bd2740ce83746876797f6e0bfb"},
+ {file = "llvmlite-0.45.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:57c48bf2e1083eedbc9406fb83c4e6483017879714916fe8be8a72a9672c995a"},
+ {file = "llvmlite-0.45.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3aa3dfceda4219ae39cf18806c60eeb518c1680ff834b8b311bd784160b9ce40"},
+ {file = "llvmlite-0.45.1-cp313-cp313-win_amd64.whl", hash = "sha256:080e6f8d0778a8239cd47686d402cb66eb165e421efa9391366a9b7e5810a38b"},
+ {file = "llvmlite-0.45.1.tar.gz", hash = "sha256:09430bb9d0bb58fc45a45a57c7eae912850bedc095cd0810a57de109c69e1c32"},
+]
+
[[package]]
name = "markdown-it-py"
version = "3.0.0"
@@ -2590,7 +2600,6 @@ files = [
[package.dependencies]
numpy = [
- {version = ">=1.22.4", markers = "python_version < \"3.11\""},
{version = ">=1.23.2", markers = "python_version == \"3.11\""},
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
]
@@ -2864,7 +2873,6 @@ files = [
[package.dependencies]
pastel = ">=0.2.1,<0.3.0"
pyyaml = ">=6.0.2,<7.0"
-tomli = {version = ">=1.2.2", markers = "python_version < \"3.11\""}
[package.extras]
poetry-plugin = ["poetry (>=1.2.0,<3.0.0) ; python_version < \"4.0\""]
@@ -3110,12 +3118,10 @@ files = [
[package.dependencies]
colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""}
-exceptiongroup = {version = ">=1", markers = "python_version < \"3.11\""}
iniconfig = ">=1.0.1"
packaging = ">=22"
pluggy = ">=1.5,<2"
pygments = ">=2.7.2"
-tomli = {version = ">=1", markers = "python_version < \"3.11\""}
[package.extras]
dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"]
@@ -3517,7 +3523,6 @@ description = "Manipulate well-formed Roman numerals"
optional = false
python-versions = ">=3.9"
groups = ["docs"]
-markers = "python_version >= \"3.11\""
files = [
{file = "roman_numerals_py-3.1.0-py3-none-any.whl", hash = "sha256:9da2ad2fb670bcf24e81070ceb3be72f6c11c440d73bd579fbeca1e9f330954c"},
{file = "roman_numerals_py-3.1.0.tar.gz", hash = "sha256:be4bf804f083a4ce001b5eb7e3c0862479d10f94c936f6c4e5f250aa5ff5bd2d"},
@@ -3690,71 +3695,6 @@ docs = ["PyWavelets (>=1.6)", "dask[array] (>=2023.2.0)", "intersphinx-registry
optional = ["PyWavelets (>=1.6)", "SimpleITK", "astropy (>=5.0)", "cloudpickle (>=1.1.1)", "dask[array] (>=2023.2.0)", "matplotlib (>=3.7)", "pooch (>=1.6.0)", "pyamg (>=5.2)", "scikit-learn (>=1.2)"]
test = ["asv", "numpydoc (>=1.7)", "pooch (>=1.6.0)", "pytest (>=8)", "pytest-cov (>=2.11.0)", "pytest-doctestplus", "pytest-faulthandler", "pytest-localserver"]
-[[package]]
-name = "scipy"
-version = "1.15.3"
-description = "Fundamental algorithms for scientific computing in Python"
-optional = false
-python-versions = ">=3.10"
-groups = ["main", "dev"]
-markers = "python_version == \"3.10\""
-files = [
- {file = "scipy-1.15.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:a345928c86d535060c9c2b25e71e87c39ab2f22fc96e9636bd74d1dbf9de448c"},
- {file = "scipy-1.15.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:ad3432cb0f9ed87477a8d97f03b763fd1d57709f1bbde3c9369b1dff5503b253"},
- {file = "scipy-1.15.3-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:aef683a9ae6eb00728a542b796f52a5477b78252edede72b8327a886ab63293f"},
- {file = "scipy-1.15.3-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:1c832e1bd78dea67d5c16f786681b28dd695a8cb1fb90af2e27580d3d0967e92"},
- {file = "scipy-1.15.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:263961f658ce2165bbd7b99fa5135195c3a12d9bef045345016b8b50c315cb82"},
- {file = "scipy-1.15.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e2abc762b0811e09a0d3258abee2d98e0c703eee49464ce0069590846f31d40"},
- {file = "scipy-1.15.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ed7284b21a7a0c8f1b6e5977ac05396c0d008b89e05498c8b7e8f4a1423bba0e"},
- {file = "scipy-1.15.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5380741e53df2c566f4d234b100a484b420af85deb39ea35a1cc1be84ff53a5c"},
- {file = "scipy-1.15.3-cp310-cp310-win_amd64.whl", hash = "sha256:9d61e97b186a57350f6d6fd72640f9e99d5a4a2b8fbf4b9ee9a841eab327dc13"},
- {file = "scipy-1.15.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:993439ce220d25e3696d1b23b233dd010169b62f6456488567e830654ee37a6b"},
- {file = "scipy-1.15.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:34716e281f181a02341ddeaad584205bd2fd3c242063bd3423d61ac259ca7eba"},
- {file = "scipy-1.15.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3b0334816afb8b91dab859281b1b9786934392aa3d527cd847e41bb6f45bee65"},
- {file = "scipy-1.15.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:6db907c7368e3092e24919b5e31c76998b0ce1684d51a90943cb0ed1b4ffd6c1"},
- {file = "scipy-1.15.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:721d6b4ef5dc82ca8968c25b111e307083d7ca9091bc38163fb89243e85e3889"},
- {file = "scipy-1.15.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39cb9c62e471b1bb3750066ecc3a3f3052b37751c7c3dfd0fd7e48900ed52982"},
- {file = "scipy-1.15.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:795c46999bae845966368a3c013e0e00947932d68e235702b5c3f6ea799aa8c9"},
- {file = "scipy-1.15.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:18aaacb735ab38b38db42cb01f6b92a2d0d4b6aabefeb07f02849e47f8fb3594"},
- {file = "scipy-1.15.3-cp311-cp311-win_amd64.whl", hash = "sha256:ae48a786a28412d744c62fd7816a4118ef97e5be0bee968ce8f0a2fba7acf3bb"},
- {file = "scipy-1.15.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6ac6310fdbfb7aa6612408bd2f07295bcbd3fda00d2d702178434751fe48e019"},
- {file = "scipy-1.15.3-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:185cd3d6d05ca4b44a8f1595af87f9c372bb6acf9c808e99aa3e9aa03bd98cf6"},
- {file = "scipy-1.15.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:05dc6abcd105e1a29f95eada46d4a3f251743cfd7d3ae8ddb4088047f24ea477"},
- {file = "scipy-1.15.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:06efcba926324df1696931a57a176c80848ccd67ce6ad020c810736bfd58eb1c"},
- {file = "scipy-1.15.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05045d8b9bfd807ee1b9f38761993297b10b245f012b11b13b91ba8945f7e45"},
- {file = "scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:271e3713e645149ea5ea3e97b57fdab61ce61333f97cfae392c28ba786f9bb49"},
- {file = "scipy-1.15.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6cfd56fc1a8e53f6e89ba3a7a7251f7396412d655bca2aa5611c8ec9a6784a1e"},
- {file = "scipy-1.15.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0ff17c0bb1cb32952c09217d8d1eed9b53d1463e5f1dd6052c7857f83127d539"},
- {file = "scipy-1.15.3-cp312-cp312-win_amd64.whl", hash = "sha256:52092bc0472cfd17df49ff17e70624345efece4e1a12b23783a1ac59a1b728ed"},
- {file = "scipy-1.15.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2c620736bcc334782e24d173c0fdbb7590a0a436d2fdf39310a8902505008759"},
- {file = "scipy-1.15.3-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:7e11270a000969409d37ed399585ee530b9ef6aa99d50c019de4cb01e8e54e62"},
- {file = "scipy-1.15.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:8c9ed3ba2c8a2ce098163a9bdb26f891746d02136995df25227a20e71c396ebb"},
- {file = "scipy-1.15.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:0bdd905264c0c9cfa74a4772cdb2070171790381a5c4d312c973382fc6eaf730"},
- {file = "scipy-1.15.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79167bba085c31f38603e11a267d862957cbb3ce018d8b38f79ac043bc92d825"},
- {file = "scipy-1.15.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9deabd6d547aee2c9a81dee6cc96c6d7e9a9b1953f74850c179f91fdc729cb7"},
- {file = "scipy-1.15.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dde4fc32993071ac0c7dd2d82569e544f0bdaff66269cb475e0f369adad13f11"},
- {file = "scipy-1.15.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f77f853d584e72e874d87357ad70f44b437331507d1c311457bed8ed2b956126"},
- {file = "scipy-1.15.3-cp313-cp313-win_amd64.whl", hash = "sha256:b90ab29d0c37ec9bf55424c064312930ca5f4bde15ee8619ee44e69319aab163"},
- {file = "scipy-1.15.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3ac07623267feb3ae308487c260ac684b32ea35fd81e12845039952f558047b8"},
- {file = "scipy-1.15.3-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6487aa99c2a3d509a5227d9a5e889ff05830a06b2ce08ec30df6d79db5fcd5c5"},
- {file = "scipy-1.15.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:50f9e62461c95d933d5c5ef4a1f2ebf9a2b4e83b0db374cb3f1de104d935922e"},
- {file = "scipy-1.15.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:14ed70039d182f411ffc74789a16df3835e05dc469b898233a245cdfd7f162cb"},
- {file = "scipy-1.15.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a769105537aa07a69468a0eefcd121be52006db61cdd8cac8a0e68980bbb723"},
- {file = "scipy-1.15.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9db984639887e3dffb3928d118145ffe40eff2fa40cb241a306ec57c219ebbbb"},
- {file = "scipy-1.15.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:40e54d5c7e7ebf1aa596c374c49fa3135f04648a0caabcb66c52884b943f02b4"},
- {file = "scipy-1.15.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5e721fed53187e71d0ccf382b6bf977644c533e506c4d33c3fb24de89f5c3ed5"},
- {file = "scipy-1.15.3-cp313-cp313t-win_amd64.whl", hash = "sha256:76ad1fb5f8752eabf0fa02e4cc0336b4e8f021e2d5f061ed37d6d264db35e3ca"},
- {file = "scipy-1.15.3.tar.gz", hash = "sha256:eae3cf522bc7df64b42cad3925c876e1b0b6c35c1337c93e12c0f366f55b0eaf"},
-]
-
-[package.dependencies]
-numpy = ">=1.23.5,<2.5"
-
-[package.extras]
-dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy (==1.10.0)", "pycodestyle", "pydevtool", "rich-click", "ruff (>=0.0.292)", "types-psutil", "typing_extensions"]
-doc = ["intersphinx_registry", "jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.19.1)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0,<8.0.0)", "sphinx-copybutton", "sphinx-design (>=0.4.0)"]
-test = ["Cython", "array-api-strict (>=2.0,<2.1.1)", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja ; sys_platform != \"emscripten\"", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"]
-
[[package]]
name = "scipy"
version = "1.16.0"
@@ -3801,7 +3741,7 @@ files = [
{file = "scipy-1.16.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f56296fefca67ba605fd74d12f7bd23636267731a72cb3947963e76b8c0a25db"},
{file = "scipy-1.16.0.tar.gz", hash = "sha256:b5ef54021e832869c8cfb03bc3bf20366cbcd426e02a58e8a58d7584dfbb8f62"},
]
-markers = {main = "python_version >= \"3.11\"", dev = "python_version < \"3.13\" and python_version >= \"3.11\""}
+markers = {dev = "python_version < \"3.13\""}
[package.dependencies]
numpy = ">=1.25.2,<2.6"
@@ -3920,43 +3860,6 @@ files = [
{file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"},
]
-[[package]]
-name = "sphinx"
-version = "8.1.3"
-description = "Python documentation generator"
-optional = false
-python-versions = ">=3.10"
-groups = ["docs"]
-markers = "python_version == \"3.10\""
-files = [
- {file = "sphinx-8.1.3-py3-none-any.whl", hash = "sha256:09719015511837b76bf6e03e42eb7595ac8c2e41eeb9c29c5b755c6b677992a2"},
- {file = "sphinx-8.1.3.tar.gz", hash = "sha256:43c1911eecb0d3e161ad78611bc905d1ad0e523e4ddc202a58a821773dc4c927"},
-]
-
-[package.dependencies]
-alabaster = ">=0.7.14"
-babel = ">=2.13"
-colorama = {version = ">=0.4.6", markers = "sys_platform == \"win32\""}
-docutils = ">=0.20,<0.22"
-imagesize = ">=1.3"
-Jinja2 = ">=3.1"
-packaging = ">=23.0"
-Pygments = ">=2.17"
-requests = ">=2.30.0"
-snowballstemmer = ">=2.2"
-sphinxcontrib-applehelp = ">=1.0.7"
-sphinxcontrib-devhelp = ">=1.0.6"
-sphinxcontrib-htmlhelp = ">=2.0.6"
-sphinxcontrib-jsmath = ">=1.0.1"
-sphinxcontrib-qthelp = ">=1.0.6"
-sphinxcontrib-serializinghtml = ">=1.1.9"
-tomli = {version = ">=2", markers = "python_version < \"3.11\""}
-
-[package.extras]
-docs = ["sphinxcontrib-websupport"]
-lint = ["flake8 (>=6.0)", "mypy (==1.11.1)", "pyright (==1.1.384)", "pytest (>=6.0)", "ruff (==0.6.9)", "sphinx-lint (>=0.9)", "tomli (>=2)", "types-Pillow (==10.2.0.20240822)", "types-Pygments (==2.18.0.20240506)", "types-colorama (==0.4.15.20240311)", "types-defusedxml (==0.7.0.20240218)", "types-docutils (==0.21.0.20241005)", "types-requests (==2.32.0.20240914)", "types-urllib3 (==1.26.25.14)"]
-test = ["cython (>=3.0)", "defusedxml (>=0.7.1)", "pytest (>=8.0)", "setuptools (>=70.0)", "typing_extensions (>=4.9)"]
-
[[package]]
name = "sphinx"
version = "8.2.3"
@@ -3964,7 +3867,6 @@ description = "Python documentation generator"
optional = false
python-versions = ">=3.11"
groups = ["docs"]
-markers = "python_version >= \"3.11\""
files = [
{file = "sphinx-8.2.3-py3-none-any.whl", hash = "sha256:4405915165f13521d875a8c29c8970800a0141c14cc5416a38feca4ea5d9b9c3"},
{file = "sphinx-8.2.3.tar.gz", hash = "sha256:398ad29dee7f63a75888314e9424d40f52ce5a6a87ae88e7071e80af296ec348"},
@@ -4319,19 +4221,6 @@ webencodings = ">=0.4"
doc = ["sphinx", "sphinx_rtd_theme"]
test = ["pytest", "ruff"]
-[[package]]
-name = "tomli"
-version = "2.0.1"
-description = "A lil' TOML parser"
-optional = false
-python-versions = ">=3.7"
-groups = ["dev", "docs"]
-markers = "python_version == \"3.10\""
-files = [
- {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
- {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
-]
-
[[package]]
name = "tornado"
version = "6.5.1"
@@ -4393,7 +4282,7 @@ files = [
{file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
{file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
]
-markers = {main = "python_version < \"3.12\""}
+markers = {main = "python_version == \"3.11\""}
[[package]]
name = "tzdata"
@@ -4527,5 +4416,5 @@ test = ["big-O", "importlib-resources ; python_version < \"3.9\"", "jaraco.funct
[metadata]
lock-version = "2.1"
-python-versions = ">=3.10,<3.14"
-content-hash = "e9942c2c0e0ae6e1750f042416105fb7d2cd2287a930aef2b98f36542bad99b5"
+python-versions = ">=3.11,<3.14"
+content-hash = "36ea2d832a003c383766c71a850c800ee70edca5f1b72acc3cc2d7546c35e0ec"
diff --git a/pyproject.toml b/pyproject.toml
index 33b3376..5daa0c9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,14 +15,14 @@ packages = [ { include = "cytodataframe", from = "src" } ]
poetry-dynamic-versioning = { version = ">=1.0.0,<2.0.0", extras = [ "plugin" ] }
[tool.poetry.dependencies]
-python = ">=3.10,<3.14"
+python = ">=3.11,<3.14"
# used for data management
pandas = [
{ version = "<2.2.2", python = "<3.9" },
{ version = "^2.2.2", python = ">=3.9" },
]
# used for data ingest and export
-pyarrow = ">=16,<21"
+pyarrow = ">=16"
# used for environment detection
ipython = "^8.12.3"
# used for image processing
@@ -56,7 +56,7 @@ isort = ">=5.13.2,<8.0.0"
jupyterlab-code-formatter = "^3.0.2"
duckdb = "^1.1.3"
# version specifications below used to help surpass 3.13-based installations
-cosmicqc = { version = "^0.1.4", markers = "python_version < '3.13' and python_version >= '3.10'" }
+cosmicqc = { version = ">=1.0.1", markers = "python_version < '3.13' and python_version >= '3.10'" }
poethepoet = "^0.37.0"
[tool.poetry.group.docs.dependencies]
@@ -116,6 +116,7 @@ lint.select = [
]
# Ignore `E402` and `F401` (unused imports) in all `__init__.py` files
lint.per-file-ignores."__init__.py" = [ "E402", "F401" ]
+lint.per-file-ignores."src/cytodataframe/*.py" = [ "ANN401", "PLC0415" ]
lint.per-file-ignores."src/cytodataframe/image.py" = [ "PLR2004" ]
# ignore typing rules for tests
lint.per-file-ignores."tests/*" = [ "ANN201", "PLR0913", "PLR2004", "SIM105" ]
diff --git a/src/cytodataframe/frame.py b/src/cytodataframe/frame.py
index 1581a95..246e888 100644
--- a/src/cytodataframe/frame.py
+++ b/src/cytodataframe/frame.py
@@ -7,6 +7,8 @@
import pathlib
import re
import sys
+import tempfile
+import uuid
import warnings
from io import BytesIO, StringIO
from typing import (
@@ -285,7 +287,7 @@ def __init__( # noqa: PLR0913
# instead of Pandas DataFrames.
self._wrap_methods()
- def __getitem__(self: CytoDataFrame_type, key: Union[int, str]) -> Any: # noqa: ANN401
+ def __getitem__(self: CytoDataFrame_type, key: Union[int, str]) -> Any:
"""
Returns an element or a slice of the underlying pandas DataFrame.
@@ -330,7 +332,7 @@ def _return_cytodataframe(
method_name: str,
*args: Tuple[Any, ...],
**kwargs: Dict[str, Any],
- ) -> Any: # noqa: ANN401
+ ) -> Any:
"""
Wraps a given method to ensure that the returned result
is an CytoDataFrame if applicable.
@@ -400,7 +402,7 @@ def _wrap_method(self: CytoDataFrame_type, method_name: str) -> Callable:
the result is a CytoDataFrame.
"""
- def wrapper(*args: Tuple[Any, ...], **kwargs: Dict[str, Any]) -> Any: # noqa: ANN401
+ def wrapper(*args: Tuple[Any, ...], **kwargs: Dict[str, Any]) -> Any:
"""
Wraps the specified method to ensure
it returns a CytoDataFrame.
@@ -639,6 +641,283 @@ def export(
else:
raise ValueError("Unsupported file format for export.")
+ def to_ome_parquet( # noqa: PLR0915, PLR0912, C901
+ self: CytoDataFrame_type,
+ file_path: Union[str, pathlib.Path],
+ arrow_column_suffix: str = "_OMEArrow",
+ include_original: bool = True,
+ include_mask_outline: bool = True,
+ include_composite: bool = True,
+ **kwargs: Dict[str, Any],
+ ) -> None:
+ """Export the dataframe with cropped images encoded as OMEArrow structs."""
+
+ try:
+ from ome_arrow import OMEArrow # type: ignore
+ except ImportError as exc:
+ raise ImportError(
+ "CytoDataFrame.to_ome_parquet requires the optional 'ome-arrow' "
+ "dependency. Install it via `pip install ome-arrow`."
+ ) from exc
+
+ try:
+ import importlib.metadata as importlib_metadata
+ except ImportError: # pragma: no cover
+ import importlib_metadata # type: ignore
+
+ try:
+ ome_arrow_version = importlib_metadata.version("ome-arrow")
+ except importlib_metadata.PackageNotFoundError:
+ module = sys.modules.get("ome_arrow")
+ ome_arrow_version = getattr(module, "__version__", None)
+
+ if not any((include_original, include_mask_outline, include_composite)):
+ raise ValueError(
+ "At least one of include_original, include_mask_outline, or "
+ "include_composite must be True."
+ )
+
+ image_cols = self.find_image_columns() or []
+ if not image_cols:
+ logger.debug(
+ "No image filename columns detected. Falling back to to_parquet()."
+ )
+ self.to_parquet(file_path, **kwargs)
+ return
+
+ bounding_box_df = self._custom_attrs.get("data_bounding_box")
+ if bounding_box_df is None:
+ raise ValueError(
+ "to_ome_parquet requires bounding box metadata to crop images."
+ )
+
+ bounding_box_cols = bounding_box_df.columns.tolist()
+ bbox_column_map = {
+ "x_min": next(
+ (col for col in bounding_box_cols if "Minimum_X" in str(col)), None
+ ),
+ "y_min": next(
+ (col for col in bounding_box_cols if "Minimum_Y" in str(col)), None
+ ),
+ "x_max": next(
+ (col for col in bounding_box_cols if "Maximum_X" in str(col)), None
+ ),
+ "y_max": next(
+ (col for col in bounding_box_cols if "Maximum_Y" in str(col)), None
+ ),
+ }
+
+ if any(value is None for value in bbox_column_map.values()):
+ raise ValueError(
+ "Unable to identify all bounding box coordinate columns for export."
+ )
+
+ working_df = self.copy()
+
+ missing_bbox_cols = [
+ col for col in bounding_box_cols if col not in working_df.columns
+ ]
+ if missing_bbox_cols:
+ working_df = working_df.join(bounding_box_df[missing_bbox_cols])
+
+ comp_center_df = self._custom_attrs.get("compartment_center_xy")
+ comp_center_cols: List[str] = []
+ missing_comp_cols: List[str] = []
+ if comp_center_df is not None:
+ comp_center_cols = comp_center_df.columns.tolist()
+ missing_comp_cols = [
+ col for col in comp_center_cols if col not in working_df.columns
+ ]
+ if missing_comp_cols:
+ working_df = working_df.join(comp_center_df[missing_comp_cols])
+
+ image_path_df = self._custom_attrs.get("data_image_paths")
+ missing_path_cols: List[str] = []
+ if image_path_df is not None:
+ image_path_cols_all = image_path_df.columns.tolist()
+ missing_path_cols = [
+ col for col in image_path_cols_all if col not in working_df.columns
+ ]
+ if missing_path_cols:
+ working_df = working_df.join(image_path_df[missing_path_cols])
+
+ all_cols_str, all_cols_back = self._normalize_labels(working_df.columns)
+ image_cols_str = [str(col) for col in image_cols]
+ image_path_cols_str = self.find_image_path_columns(
+ image_cols=image_cols_str, all_cols=all_cols_str
+ )
+ image_path_cols = {}
+ for image_col in image_cols:
+ key = str(image_col)
+ if key in image_path_cols_str:
+ mapped_col = image_path_cols_str[key]
+ image_path_cols[image_col] = all_cols_back.get(
+ str(mapped_col), mapped_col
+ )
+
+ comp_center_x = next((col for col in comp_center_cols if "X" in str(col)), None)
+ comp_center_y = next((col for col in comp_center_cols if "Y" in str(col)), None)
+
+ kwargs.setdefault("engine", "pyarrow")
+
+ from cytodataframe import __version__ as cytodataframe_version
+
+ metadata = {
+ "cytodataframe:data-producer": "https://github.com/cytomining/CytoDataFrame",
+ "cytodataframe:data-producer-version": cytodataframe_version,
+ }
+ if ome_arrow_version is not None:
+ metadata["cytodataframe:ome-arrow-version"] = ome_arrow_version
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ tmpdir_path = pathlib.Path(tmpdir)
+ for image_col in image_cols:
+ image_path_col = image_path_cols.get(image_col)
+
+ layer_configs: List[Tuple[str, str]] = []
+ if include_original:
+ layer_configs.append(
+ ("original", f"{image_col}{arrow_column_suffix}_ORIG")
+ )
+ if include_mask_outline:
+ layer_configs.append(
+ ("mask", f"{image_col}{arrow_column_suffix}_LABL")
+ )
+ if include_composite:
+ layer_configs.append(
+ ("composite", f"{image_col}{arrow_column_suffix}_COMP")
+ )
+
+ column_values = {col_name: [] for _, col_name in layer_configs}
+
+ for _, row in working_df.iterrows():
+ image_value = row.get(image_col)
+ if image_value is None or pd.isna(image_value):
+ for _, col_name in layer_configs:
+ column_values[col_name].append(None)
+ continue
+
+ try:
+ bbox_values = (
+ row[bbox_column_map["x_min"]],
+ row[bbox_column_map["y_min"]],
+ row[bbox_column_map["x_max"]],
+ row[bbox_column_map["y_max"]],
+ )
+ except KeyError:
+ for _, col_name in layer_configs:
+ column_values[col_name].append(None)
+ continue
+
+ if any(pd.isna(value) for value in bbox_values):
+ for _, col_name in layer_configs:
+ column_values[col_name].append(None)
+ continue
+
+ bounding_box = tuple(int(value) for value in bbox_values)
+
+ compartment_center = None
+ if comp_center_x and comp_center_y:
+ center_vals = (row.get(comp_center_x), row.get(comp_center_y))
+ if not any(val is None or pd.isna(val) for val in center_vals):
+ compartment_center = tuple(int(v) for v in center_vals)
+
+ image_path_value = (
+ row.get(image_path_col) if image_path_col is not None else None
+ )
+
+ layers = self._prepare_cropped_image_layers(
+ data_value=image_value,
+ bounding_box=bounding_box,
+ compartment_center_xy=compartment_center,
+ image_path=image_path_value,
+ include_original=include_original,
+ include_mask_outline=include_mask_outline,
+ include_composite=include_composite,
+ )
+
+ sanitized_col = re.sub(r"[^A-Za-z0-9_.-]", "_", str(image_col))
+
+ for layer_key, col_name in layer_configs:
+ layer_array = layers.get(layer_key)
+ if layer_array is None:
+ column_values[col_name].append(None)
+ continue
+
+ temp_path = (
+ tmpdir_path
+ / f"{sanitized_col}_{layer_key}_{uuid.uuid4().hex}.tiff"
+ )
+ try:
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore", UserWarning)
+ imageio.imwrite(temp_path, layer_array, format="tiff")
+ except Exception as exc:
+ logger.error(
+ "Failed to write temporary TIFF for OMEArrow (%s): %s",
+ layer_key,
+ exc,
+ )
+ column_values[col_name].append(None)
+ continue
+ try:
+ ome_struct = OMEArrow(data=str(temp_path)).data
+ if hasattr(ome_struct, "as_py"):
+ ome_struct = ome_struct.as_py()
+ except Exception as exc:
+ logger.error(
+ "Failed to create OMEArrow struct for %s: %s",
+ layer_key,
+ exc,
+ )
+ column_values[col_name].append(None)
+ continue
+ column_values[col_name].append(ome_struct)
+
+ for _, col_name in layer_configs:
+ working_df[col_name] = column_values[col_name]
+
+ if missing_bbox_cols:
+ working_df = working_df.drop(columns=missing_bbox_cols)
+
+ if missing_comp_cols:
+ working_df = working_df.drop(columns=missing_comp_cols)
+
+ if missing_path_cols:
+ working_df = working_df.drop(columns=missing_path_cols)
+
+ final_kwargs = kwargs.copy()
+ engine = final_kwargs.pop("engine", None)
+ existing_metadata = final_kwargs.pop("metadata", {}) or {}
+ merged_metadata = {**metadata, **existing_metadata}
+
+ index_arg = final_kwargs.pop("index", None)
+ if merged_metadata:
+ import pyarrow as pa
+ import pyarrow.parquet as pq
+
+ table = pa.Table.from_pandas(
+ working_df,
+ preserve_index=True if index_arg is None else index_arg,
+ )
+ existing = table.schema.metadata or {}
+ new_metadata = {
+ **existing,
+ **{
+ str(k).encode(): str(v).encode()
+ for k, v in merged_metadata.items()
+ if v is not None
+ },
+ }
+ table = table.replace_schema_metadata(new_metadata)
+ pq.write_table(table, file_path, **final_kwargs)
+ else:
+ if index_arg is not None:
+ final_kwargs["index"] = index_arg
+ if engine is not None:
+ final_kwargs["engine"] = engine
+ working_df.to_parquet(file_path, **final_kwargs)
+
@staticmethod
def is_notebook_or_lab() -> bool:
"""
@@ -703,6 +982,33 @@ def find_image_columns(self: CytoDataFrame_type) -> List[str]:
return image_cols
+ @staticmethod
+ def _is_ome_arrow_value(value: Any) -> bool:
+ """Check whether a value looks like an OME-Arrow struct."""
+
+ return (
+ isinstance(value, dict)
+ and value.get("type") == "ome.arrow"
+ and value.get("planes") is not None
+ and value.get("pixels_meta") is not None
+ )
+
+ def find_ome_arrow_columns(
+ self: CytoDataFrame_type, data: pd.DataFrame
+ ) -> List[str]:
+ """Identify columns that contain OME-Arrow structs."""
+
+ ome_cols: List[str] = []
+ for column in data.columns:
+ series = data[column]
+ if series.apply(self._is_ome_arrow_value).any():
+ ome_cols.append(column)
+
+ if ome_cols:
+ logger.debug("Found OME-Arrow columns: %s", ome_cols)
+
+ return ome_cols
+
def get_image_paths_from_data(
self: CytoDataFrame_type, image_cols: List[str]
) -> Dict[str, str]:
@@ -771,7 +1077,7 @@ def search_for_mask_or_outline( # noqa: PLR0913, PLR0911, C901
candidate_path: pathlib.Path,
orig_image: np.ndarray,
mask: bool = True,
- ) -> np.ndarray:
+ ) -> Tuple[Optional[np.ndarray], Optional[pathlib.Path]]:
"""
Search for a mask or outline image file based on the
provided patterns and apply it to the target image.
@@ -805,7 +1111,7 @@ def search_for_mask_or_outline( # noqa: PLR0913, PLR0911, C901
if file_dir is None:
logger.debug("No mask or outline directory specified.")
- return None
+ return None, None
if pattern_map is None:
matching_mask_file = list(
@@ -823,18 +1129,24 @@ def search_for_mask_or_outline( # noqa: PLR0913, PLR0911, C901
outline_color = display_options.get("outline_color", (0, 255, 0))
if mask:
- return draw_outline_on_image_from_mask(
- orig_image=orig_image,
- mask_image_path=matching_mask_file[0],
- outline_color=outline_color,
+ return (
+ draw_outline_on_image_from_mask(
+ orig_image=orig_image,
+ mask_image_path=matching_mask_file[0],
+ outline_color=outline_color,
+ ),
+ matching_mask_file[0],
)
else:
- return draw_outline_on_image_from_outline(
- orig_image=orig_image,
- outline_image_path=matching_mask_file[0],
- outline_color=outline_color,
+ return (
+ draw_outline_on_image_from_outline(
+ orig_image=orig_image,
+ outline_image_path=matching_mask_file[0],
+ outline_color=outline_color,
+ ),
+ matching_mask_file[0],
)
- return None
+ return None, None
for file_pattern, original_pattern in pattern_map.items():
if re.search(original_pattern, data_value):
@@ -856,55 +1168,114 @@ def search_for_mask_or_outline( # noqa: PLR0913, PLR0911, C901
# gather the outline color if specified
outline_color = display_options.get("outline_color", (0, 255, 0))
if mask:
- return draw_outline_on_image_from_mask(
- orig_image=orig_image,
- mask_image_path=matching_files[0],
- outline_color=outline_color,
+ return (
+ draw_outline_on_image_from_mask(
+ orig_image=orig_image,
+ mask_image_path=matching_files[0],
+ outline_color=outline_color,
+ ),
+ matching_files[0],
)
else:
- return draw_outline_on_image_from_outline(
- orig_image=orig_image,
- outline_image_path=matching_files[0],
- outline_color=outline_color,
+ return (
+ draw_outline_on_image_from_outline(
+ orig_image=orig_image,
+ outline_image_path=matching_files[0],
+ outline_color=outline_color,
+ ),
+ matching_files[0],
)
logger.debug("No mask or outline found for: %s", data_value)
- return None
+ return None, None
- def process_image_data_as_html_display( # noqa: PLR0912, C901, PLR0915
+ def _extract_array_from_ome_arrow( # noqa: PLR0911
self: CytoDataFrame_type,
- data_value: Any, # noqa: ANN401
+ data_value: Any,
+ ) -> Optional[np.ndarray]:
+ """Convert an OME-Arrow struct (dict) into an ndarray."""
+
+ if not self._is_ome_arrow_value(data_value):
+ return None
+
+ try:
+ pixels_meta = data_value.get("pixels_meta", {})
+ size_x = int(pixels_meta.get("size_x"))
+ size_y = int(pixels_meta.get("size_y"))
+ planes = data_value.get("planes")
+
+ if size_x <= 0 or size_y <= 0 or planes is None:
+ return None
+
+ if isinstance(planes, np.ndarray):
+ plane_entries = planes.tolist()
+ else:
+ plane_entries = list(planes)
+
+ if not plane_entries:
+ return None
+
+ plane = plane_entries[0]
+ pixels = plane.get("pixels")
+ if pixels is None:
+ return None
+
+ np_pixels = np.asarray(pixels)
+ base = size_x * size_y
+ if base <= 0 or np_pixels.size == 0 or np_pixels.size % base != 0:
+ return None
+
+ channel_count = np_pixels.size // base
+ if channel_count == 1:
+ array = np_pixels.reshape((size_y, size_x))
+ else:
+ array = np_pixels.reshape((size_y, size_x, channel_count))
+
+ return self._ensure_uint8(array)
+ except Exception as exc:
+ logger.debug("Unable to decode OME-Arrow struct: %s", exc)
+ return None
+
+ @staticmethod
+ def _ensure_uint8(array: np.ndarray) -> np.ndarray:
+ """Convert the provided array to uint8 without unnecessary warnings."""
+
+ arr = np.asarray(array)
+ if np.issubdtype(arr.dtype, np.integer):
+ min_val = arr.min(initial=0)
+ max_val = arr.max(initial=0)
+ if 0 <= min_val <= 255 and 0 <= max_val <= 255: # noqa: PLR2004
+ return arr.astype(np.uint8, copy=False)
+ return img_as_ubyte(arr)
+
+ @staticmethod
+ def _ensure_uint8(array: np.ndarray) -> np.ndarray:
+ """Convert the provided array to uint8 without unnecessary warnings."""
+
+ arr = np.asarray(array)
+ if np.issubdtype(arr.dtype, np.integer):
+ min_val = arr.min(initial=0)
+ max_val = arr.max(initial=0)
+ if min_val >= 0 and max_val <= 255: # noqa: PLR2004
+ return arr.astype(np.uint8, copy=False)
+ return img_as_ubyte(arr)
+
+ def _prepare_cropped_image_layers( # noqa: C901, PLR0915, PLR0912, PLR0913
+ self: CytoDataFrame_type,
+ data_value: Any,
bounding_box: Tuple[int, int, int, int],
compartment_center_xy: Optional[Tuple[int, int]] = None,
image_path: Optional[str] = None,
- ) -> str:
- """
- Process the image data based on the provided data value
- and bounding box, applying masks or outlines where
- applicable, and return an HTML representation of the
- cropped image for display.
-
- Args:
- data_value (Any):
- The value to search for in the file system or as the image data.
- bounding_box (Tuple[int, int, int, int]):
- The bounding box to crop the image.
- compartment_center_xy (Optional[Tuple[int, int]]):
- The center coordinates of the compartment.
- image_path (Optional[str]):
- The path to the image file.
-
- Returns:
- str:
- The HTML image display string, or the unmodified data
- value if the image cannot be processed.
- """
+ include_original: bool = False,
+ include_mask_outline: bool = False,
+ include_composite: bool = True,
+ ) -> Dict[str, Optional[np.ndarray]]:
+ """Return requested cropped image layers for downstream consumers."""
logger.debug(
(
- "Processing image data as HTML for display."
- " Data value: %s , Bounding box: %s , "
+ "Preparing cropped layers. Data value: %s, Bounding box: %s, "
"Compartment center xy: %s, Image path: %s"
),
data_value,
@@ -913,55 +1284,58 @@ def process_image_data_as_html_display( # noqa: PLR0912, C901, PLR0915
image_path,
)
- # stringify the data value in case it isn't a string
- data_value = str(data_value)
+ layers: Dict[str, Optional[np.ndarray]] = {}
+
+ if array := self._extract_array_from_ome_arrow(data_value):
+ if include_original:
+ layers["original"] = array
+ if include_mask_outline:
+ layers["mask"] = array
+ if include_composite:
+ layers["composite"] = array
+ return layers
+ data_value = str(data_value)
candidate_path = None
- # Get the pattern map for segmentation file regex
- pattern_map = self._custom_attrs.get("segmentation_file_regex")
- # Step 1: Find the candidate file if the data value is not already a file
- if not pathlib.Path(data_value).is_file():
- # determine if we have a file from the path (dir) + filename
- if (
- self._custom_attrs["data_context_dir"] is None
- and image_path is not None
- and (
- existing_image_from_path := pathlib.Path(
- f"{image_path}/{data_value}"
- )
- ).is_file()
- ):
- logger.debug(
- "Found existing image from path: %s", existing_image_from_path
- )
- candidate_path = existing_image_from_path
+ if image_path is not None and pd.isna(image_path):
+ image_path = None
- # Search for the data value in the data context directory
- elif self._custom_attrs["data_context_dir"] is not None and (
- candidate_paths := list(
- pathlib.Path(self._custom_attrs["data_context_dir"]).rglob(
- data_value
- )
- )
- ):
- logger.debug(
- "Found candidate paths (and attempting to use the first): %s",
- candidate_paths,
- )
- # If a candidate file is found, use the first one
- candidate_path = candidate_paths[0]
+ pattern_map = self._custom_attrs.get("segmentation_file_regex")
- else:
- logger.debug("No candidate file found for: %s", data_value)
- # If no candidate file is found, return the original data value
- return data_value
+ provided_path = pathlib.Path(data_value)
+ if provided_path.is_file():
+ candidate_path = provided_path
+ elif (
+ self._custom_attrs["data_context_dir"] is None
+ and image_path is not None
+ and (
+ existing_image_from_path := pathlib.Path(image_path)
+ / pathlib.Path(data_value)
+ ).is_file()
+ ):
+ logger.debug("Found existing image from path: %s", existing_image_from_path)
+ candidate_path = existing_image_from_path
+ elif self._custom_attrs["data_context_dir"] is not None and (
+ candidate_paths := list(
+ pathlib.Path(self._custom_attrs["data_context_dir"]).rglob(data_value)
+ )
+ ):
+ logger.debug(
+ "Found candidate paths (and attempting to use the first): %s",
+ candidate_paths,
+ )
+ candidate_path = candidate_paths[0]
+ else:
+ logger.debug("No candidate file found for: %s", data_value)
+ return layers
- # read the image as an array
- orig_image_array = imageio.imread(candidate_path)
+ try:
+ orig_image_array = imageio.imread(candidate_path)
+ except (FileNotFoundError, ValueError) as exc:
+ logger.error(exc)
+ return layers
- # Adjust the image with image adjustment callable
- # or adaptive histogram equalization
if self._custom_attrs["image_adjustment"] is not None:
logger.debug("Adjusting image with custom image adjustment function.")
orig_image_array = self._custom_attrs["image_adjustment"](
@@ -974,12 +1348,11 @@ def process_image_data_as_html_display( # noqa: PLR0912, C901, PLR0915
brightness=self._custom_attrs["_widget_state"]["scale"],
)
- # Normalize to 0-255 for image saving
- orig_image_array = img_as_ubyte(orig_image_array)
+ orig_image_array = self._ensure_uint8(orig_image_array)
+
+ original_image_copy = orig_image_array.copy() if include_original else None
- prepared_image = None
- # Step 2: Search for a mask
- prepared_image = self.search_for_mask_or_outline(
+ prepared_image, mask_source_path = self.search_for_mask_or_outline(
data_value=data_value,
pattern_map=pattern_map,
file_dir=self._custom_attrs["data_mask_context_dir"],
@@ -988,10 +1361,8 @@ def process_image_data_as_html_display( # noqa: PLR0912, C901, PLR0915
mask=True,
)
- # If no mask is found, proceed to search for an outline
if prepared_image is None:
- # Step 3: Search for an outline if no mask was found
- prepared_image = self.search_for_mask_or_outline(
+ prepared_image, mask_source_path = self.search_for_mask_or_outline(
data_value=data_value,
pattern_map=pattern_map,
file_dir=self._custom_attrs["data_outline_context_dir"],
@@ -1000,11 +1371,27 @@ def process_image_data_as_html_display( # noqa: PLR0912, C901, PLR0915
mask=False,
)
- # Step 4: If neither mask nor outline is found, use the original image array
if prepared_image is None:
prepared_image = orig_image_array
- # Step 5: Add a red dot for the compartment center before cropping
+ mask_source_array = None
+ if include_mask_outline and mask_source_path is not None:
+ try:
+ loaded_mask = imageio.imread(mask_source_path)
+ if loaded_mask.ndim == 3: # noqa: PLR2004
+ mask_gray = np.max(loaded_mask[..., :3], axis=2)
+ else:
+ mask_gray = loaded_mask
+ mask_binary = mask_gray > 0
+ mask_uint8 = np.zeros(mask_binary.shape, dtype=np.uint8)
+ mask_uint8[mask_binary] = 255
+ mask_source_array = mask_uint8
+ except (FileNotFoundError, ValueError) as exc:
+ logger.error(
+ "Unable to read mask/outline image %s: %s", mask_source_path, exc
+ )
+ mask_source_array = None
+
if (
compartment_center_xy is not None
and self._custom_attrs.get("display_options", None) is None
@@ -1012,10 +1399,8 @@ def process_image_data_as_html_display( # noqa: PLR0912, C901, PLR0915
self._custom_attrs.get("display_options", None) is not None
and self._custom_attrs["display_options"].get("center_dot", True)
):
- center_x, center_y = map(int, compartment_center_xy) # Ensure integers
+ center_x, center_y = map(int, compartment_center_xy)
- # Convert grayscale image to RGB if necessary
- # Check if the image is grayscale
if len(prepared_image.shape) == 2: # noqa: PLR2004
prepared_image = skimage.color.gray2rgb(prepared_image)
@@ -1023,70 +1408,63 @@ def process_image_data_as_html_display( # noqa: PLR0912, C901, PLR0915
0 <= center_y < prepared_image.shape[0]
and 0 <= center_x < prepared_image.shape[1]
):
- # Calculate the radius as a fraction of the bounding box size
x_min, y_min, x_max, y_max = map(int, bounding_box)
box_width = x_max - x_min
box_height = y_max - y_min
- radius = max(
- 1, int(min(box_width, box_height) * 0.03)
- ) # 3% of the smaller dimension
+ radius = max(1, int(min(box_width, box_height) * 0.03))
rr, cc = skimage.draw.disk(
(center_y, center_x), radius=radius, shape=prepared_image.shape[:2]
)
- prepared_image[rr, cc] = [255, 0, 0] # Red color in RGB
+ prepared_image[rr, cc] = [255, 0, 0]
- # Step 6: Crop the image based on the bounding box and encode it to PNG format
try:
- # set a default bounding box
x_min, y_min, x_max, y_max = map(int, bounding_box)
- # if we have custom offset bounding box information, use it
if self._custom_attrs.get("display_options", None) and self._custom_attrs[
"display_options"
].get("offset_bounding_box", None):
- try:
- # note: this will default to the nuclei centers based
- # on earlier input for this parameter.
- center_x, center_y = map(int, compartment_center_xy)
+ center_x, center_y = map(int, compartment_center_xy)
- offset_bounding_box = self._custom_attrs["display_options"].get(
- "offset_bounding_box"
- )
- # generate offset bounding box positions
- x_min, y_min, x_max, y_max = get_pixel_bbox_from_offsets(
- center_x=center_x,
- center_y=center_y,
- rel_bbox=(
- offset_bounding_box["x_min"],
- offset_bounding_box["y_min"],
- offset_bounding_box["x_max"],
- offset_bounding_box["y_max"],
- ),
- )
- except IndexError:
+ offset_bounding_box = self._custom_attrs["display_options"].get(
+ "offset_bounding_box"
+ )
+ x_min, y_min, x_max, y_max = get_pixel_bbox_from_offsets(
+ center_x=center_x,
+ center_y=center_y,
+ rel_bbox=(
+ offset_bounding_box["x_min"],
+ offset_bounding_box["y_min"],
+ offset_bounding_box["x_max"],
+ offset_bounding_box["y_max"],
+ ),
+ )
+
+ cropped_img_array = prepared_image[y_min:y_max, x_min:x_max]
+
+ cropped_original = (
+ original_image_copy[y_min:y_max, x_min:x_max]
+ if include_original and original_image_copy is not None
+ else None
+ )
+ if include_mask_outline and mask_source_array is not None:
+ try:
+ cropped_mask = mask_source_array[y_min:y_max, x_min:x_max]
+ except Exception as exc:
logger.debug(
- (
- "Bounding box %s is out of bounds for image %s ."
- " Defaulting to use bounding box from data."
- ),
- (x_min, y_min, x_max, y_max),
- image_path,
+ "Failed to crop mask/outline array for %s: %s",
+ mask_source_path,
+ exc,
)
+ cropped_mask = None
+ else:
+ cropped_mask = None
- cropped_img_array = prepared_image[
- y_min:y_max, x_min:x_max
- ] # Perform slicing
-
- # Optionally add a scale bar to the cropped image
try:
display_options = self._custom_attrs.get("display_options", {}) or {}
scale_cfg = display_options.get("scale_bar", None)
- # Accept either a boolean (True -> use defaults) or a dict of options.
if scale_cfg:
- # microns-per-pixel can live in scale_cfg or in
- # display_options for convenience
um_per_pixel = None
if isinstance(scale_cfg, dict):
um_per_pixel = scale_cfg.get("um_per_pixel") or scale_cfg.get(
@@ -1097,7 +1475,6 @@ def process_image_data_as_html_display( # noqa: PLR0912, C901, PLR0915
"um_per_pixel"
) or display_options.get("pixel_size_um")
- # NEW: simple fallback for pixels_per_um / pixel_per_um (reciprocal)
if um_per_pixel is None:
ppu = None
if isinstance(scale_cfg, dict):
@@ -1114,10 +1491,9 @@ def process_image_data_as_html_display( # noqa: PLR0912, C901, PLR0915
if ppu > 0:
um_per_pixel = 1.0 / ppu
except (TypeError, ValueError):
- pass # ignore bad input and skip adding a scale bar
+ pass
if um_per_pixel:
- # Default knobs (you can expose more)
params = {
"length_um": 10.0,
"thickness_px": 4,
@@ -1157,46 +1533,60 @@ def process_image_data_as_html_display( # noqa: PLR0912, C901, PLR0915
)
},
)
- except Exception as e:
- logger.debug("Skipping scale bar due to error: %s", e)
+ except Exception as exc:
+ logger.debug("Skipping scale bar due to error: %s", exc)
- except ValueError as e:
+ except ValueError as exc:
raise ValueError(
f"Bounding box contains invalid values: {bounding_box}"
- ) from e
- except IndexError as e:
+ ) from exc
+ except IndexError as exc:
raise IndexError(
f"Bounding box {bounding_box} is out of bounds for image dimensions "
f"{prepared_image.shape}"
- ) from e
+ ) from exc
logger.debug("Cropped image array shape: %s", cropped_img_array.shape)
- # Step 7:
+ if include_composite:
+ layers["composite"] = cropped_img_array
+ if include_original:
+ layers["original"] = cropped_original
+ if include_mask_outline:
+ layers["mask"] = cropped_mask
+
+ return layers
+
+ def _prepare_cropped_image_array(
+ self: CytoDataFrame_type,
+ data_value: Any,
+ bounding_box: Tuple[int, int, int, int],
+ compartment_center_xy: Optional[Tuple[int, int]] = None,
+ image_path: Optional[str] = None,
+ ) -> Optional[np.ndarray]:
+ layers = self._prepare_cropped_image_layers(
+ data_value=data_value,
+ bounding_box=bounding_box,
+ compartment_center_xy=compartment_center_xy,
+ image_path=image_path,
+ include_composite=True,
+ )
+ return layers.get("composite")
+
+ def _image_array_to_html(self: CytoDataFrame_type, image_array: np.ndarray) -> str:
+ """Encode an image array as an HTML
tag."""
+
try:
- # Save cropped image to buffer
png_bytes_io = BytesIO()
-
- # catch warnings about low contrast images and avoid displaying them
with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
- imageio.imwrite(png_bytes_io, cropped_img_array, format="png")
+ imageio.imwrite(png_bytes_io, image_array, format="png")
png_bytes = png_bytes_io.getvalue()
-
except (FileNotFoundError, ValueError) as exc:
- # Handle errors if image processing fails
logger.error(exc)
- return data_value
+ raise
- logger.debug("Image processed successfully and being sent to HTML for display.")
-
- # Step 8: Return HTML image display as a base64-encoded PNG
- # we dynamically style the image so that it will be displayed based
- # on automatic or user-based settings from the display_options custom
- # attribute.
- display_options = self._custom_attrs.get("display_options", {})
- if display_options is None:
- display_options = {}
+ display_options = self._custom_attrs.get("display_options", {}) or {}
width = display_options.get("width", "300px")
height = display_options.get("height")
@@ -1212,6 +1602,80 @@ def process_image_data_as_html_display( # noqa: PLR0912, C901, PLR0915
f'{base64_image_bytes}" style="{html_style_joined}"/>'
)
+ def process_ome_arrow_data_as_html_display(
+ self: CytoDataFrame_type,
+ data_value: Any,
+ ) -> str:
+ """Render an OME-Arrow struct as an HTML
element."""
+
+ array = self._extract_array_from_ome_arrow(data_value)
+ if array is None:
+ return data_value
+
+ try:
+ return self._image_array_to_html(array)
+ except Exception:
+ return data_value
+
+ def process_image_data_as_html_display(
+ self: CytoDataFrame_type,
+ data_value: Any,
+ bounding_box: Tuple[int, int, int, int],
+ compartment_center_xy: Optional[Tuple[int, int]] = None,
+ image_path: Optional[str] = None,
+ ) -> str:
+ """
+ Process the image data based on the provided data value
+ and bounding box, applying masks or outlines where
+ applicable, and return an HTML representation of the
+ cropped image for display.
+
+ Args:
+ data_value (Any):
+ The value to search for in the file system or as the image data.
+ bounding_box (Tuple[int, int, int, int]):
+ The bounding box to crop the image.
+ compartment_center_xy (Optional[Tuple[int, int]]):
+ The center coordinates of the compartment.
+ image_path (Optional[str]):
+ The path to the image file.
+
+ Returns:
+ str:
+ The HTML image display string, or the unmodified data
+ value if the image cannot be processed.
+ """
+
+ logger.debug(
+ (
+ "Processing image data as HTML for display."
+ " Data value: %s , Bounding box: %s , "
+ "Compartment center xy: %s, Image path: %s"
+ ),
+ data_value,
+ bounding_box,
+ compartment_center_xy,
+ image_path,
+ )
+
+ data_value = str(data_value)
+ cropped_img_array = self._prepare_cropped_image_array(
+ data_value=data_value,
+ bounding_box=bounding_box,
+ compartment_center_xy=compartment_center_xy,
+ image_path=image_path,
+ )
+
+ if cropped_img_array is None:
+ return data_value
+
+ logger.debug("Image processed successfully and being sent to HTML for display.")
+
+ try:
+ return self._image_array_to_html(cropped_img_array)
+ except Exception:
+ return data_value
+
def get_displayed_rows(self: CytoDataFrame_type) -> List[int]:
"""
Get the indices of the rows that are currently
@@ -1488,6 +1952,13 @@ def _generate_jupyter_dataframe_html( # noqa: C901, PLR0912, PLR0915
self._custom_attrs["data_image_paths"].columns.tolist(), axis=1
)
+ ome_arrow_cols = self.find_ome_arrow_columns(data)
+ if ome_arrow_cols:
+ for ome_col in ome_arrow_cols:
+ data.loc[display_indices, ome_col] = data.loc[
+ display_indices, ome_col
+ ].apply(self.process_ome_arrow_data_as_html_display)
+
if self._custom_attrs["is_transposed"]:
# retranspose to return the
# data in the shape expected
@@ -1576,17 +2047,15 @@ def _repr_html_(self: CytoDataFrame_type, debug: bool = False) -> str:
# if we're in a notebook process as though in a jupyter environment
if get_option("display.notebook_repr_html") and not debug:
- # Mount the VBox (slider + output) exactly once
- if not self._custom_attrs["_widget_state"]["shown"]:
- display(
- widgets.VBox(
- [
- self._custom_attrs["_scale_slider"],
- self._custom_attrs["_output"],
- ]
- )
+ display(
+ widgets.VBox(
+ [
+ self._custom_attrs["_scale_slider"],
+ self._custom_attrs["_output"],
+ ]
)
- self._custom_attrs["_widget_state"]["shown"] = True
+ )
+ self._custom_attrs["_widget_state"]["shown"] = True
# Attach the slider observer exactly once
if not self._custom_attrs["_widget_state"]["observing"]:
diff --git a/tests/test_frame.py b/tests/test_frame.py
index 1d9f6fc..7eec24d 100644
--- a/tests/test_frame.py
+++ b/tests/test_frame.py
@@ -3,8 +3,12 @@
"""
import pathlib
+import sys
+import types
+import imageio.v2 as imageio
import nbformat
+import numpy as np
import pandas as pd
import pytest
from _pytest.monkeypatch import MonkeyPatch
@@ -17,6 +21,232 @@
)
+def test_to_ome_parquet_adds_arrow_column(
+ tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+ image_dir = tmp_path / "images"
+ image_dir.mkdir()
+ image_path = image_dir / "sample.tiff"
+ imageio.imwrite(image_path, np.zeros((10, 10), dtype=np.uint8))
+
+ data = pd.DataFrame(
+ {
+ "Image_FileName_DNA": [image_path.name],
+ "Image_PathName_DNA": [str(image_dir)],
+ "Cells_AreaShape_BoundingBoxMinimum_X": [0],
+ "Cells_AreaShape_BoundingBoxMinimum_Y": [0],
+ "Cells_AreaShape_BoundingBoxMaximum_X": [10],
+ "Cells_AreaShape_BoundingBoxMaximum_Y": [10],
+ }
+ )
+
+ cdf = CytoDataFrame(data=data)
+
+ class TestOMEArrow:
+ def __init__(self, data: str): # noqa: ANN204
+ self.data = data
+
+ test_module = types.SimpleNamespace(
+ OMEArrow=TestOMEArrow,
+ __version__="test",
+ __spec__=types.SimpleNamespace(loader=None),
+ )
+ monkeypatch.setitem(sys.modules, "ome_arrow", test_module)
+
+ captured: dict = {}
+
+ def fake_write_table(table, file_path, **kwargs): # noqa: ANN001, ANN202, ANN003
+ captured["df"] = table.to_pandas()
+ captured["file_path"] = file_path
+ captured["kwargs"] = kwargs
+ captured["metadata"] = table.schema.metadata or {}
+
+ monkeypatch.setattr("pyarrow.parquet.write_table", fake_write_table, raising=False)
+
+ output_path = tmp_path / "out.parquet"
+ cdf.to_ome_parquet(output_path)
+
+ composite_col = "Image_FileName_DNA_OMEArrow_COMP"
+ orig_col = "Image_FileName_DNA_OMEArrow_ORIG"
+ mask_col = "Image_FileName_DNA_OMEArrow_LABL"
+ for column in (composite_col, orig_col, mask_col):
+ assert column in captured["df"].columns
+
+ comp_value = captured["df"].loc[0, composite_col]
+ orig_value = captured["df"].loc[0, orig_col]
+ mask_value = captured["df"].loc[0, mask_col]
+
+ assert isinstance(comp_value, str) and comp_value.endswith(".tiff")
+ assert isinstance(orig_value, str) and orig_value.endswith(".tiff")
+ assert mask_value is None
+ assert captured["file_path"] == output_path
+ metadata = captured["metadata"]
+ assert metadata[b"cytodataframe:data-producer"]
+ assert metadata[b"cytodataframe:data-producer-version"]
+
+
+def test_to_ome_parquet_real_data(
+ tmp_path: pathlib.Path, cytotable_NF1_data_parquet_shrunken: str
+) -> None:
+ pytest.importorskip(
+ "ome_arrow", reason="to_ome_parquet real-data test requires ome-arrow"
+ )
+
+ parquet_path = pathlib.Path(cytotable_NF1_data_parquet_shrunken)
+ image_dir = parquet_path.parent / "Plate_2_images"
+ mask_dir = parquet_path.parent / "Plate_2_masks"
+
+ cdf = CytoDataFrame(
+ data=cytotable_NF1_data_parquet_shrunken,
+ data_context_dir=str(image_dir),
+ data_mask_context_dir=str(mask_dir),
+ )
+
+ output_path = tmp_path / "nf1.ome.parquet"
+ image_cols = cdf.find_image_columns()
+
+ cdf.to_ome_parquet(output_path)
+
+ assert output_path.exists()
+ table = parquet.read_table(output_path)
+ expected_arrow_cols = []
+ for col in image_cols:
+ expected_arrow_cols.extend(
+ [
+ f"{col}_OMEArrow_COMP",
+ f"{col}_OMEArrow_ORIG",
+ f"{col}_OMEArrow_LABL",
+ ]
+ )
+ for column in expected_arrow_cols:
+ assert column in table.column_names
+
+ mask_cols = [f"{col}_OMEArrow_LABL" for col in image_cols]
+ mask_df = table.select(mask_cols).to_pandas()
+ assert mask_df.notna().any().any()
+
+
+def test_to_ome_parquet_layer_flags(
+ tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+ image_dir = tmp_path / "images"
+ image_dir.mkdir()
+ image_path = image_dir / "sample.tiff"
+ imageio.imwrite(image_path, np.zeros((10, 10), dtype=np.uint8))
+
+ data = pd.DataFrame(
+ {
+ "Image_FileName_DNA": [image_path.name],
+ "Image_PathName_DNA": [str(image_dir)],
+ "Cells_AreaShape_BoundingBoxMinimum_X": [0],
+ "Cells_AreaShape_BoundingBoxMinimum_Y": [0],
+ "Cells_AreaShape_BoundingBoxMaximum_X": [10],
+ "Cells_AreaShape_BoundingBoxMaximum_Y": [10],
+ }
+ )
+
+ cdf = CytoDataFrame(data=data)
+
+ class TestOMEArrow:
+ def __init__(self, data: str): # noqa: ANN204
+ self.data = data
+
+ test_module = types.SimpleNamespace(
+ OMEArrow=TestOMEArrow,
+ __version__="test",
+ __spec__=types.SimpleNamespace(loader=None),
+ )
+ monkeypatch.setitem(sys.modules, "ome_arrow", test_module)
+
+ captured: dict = {}
+
+ def fake_write_table(table, file_path, **kwargs): # noqa: ANN001, ANN202, ANN003
+ captured["df"] = table.to_pandas()
+
+ monkeypatch.setattr("pyarrow.parquet.write_table", fake_write_table, raising=False)
+
+ cdf.to_ome_parquet(
+ tmp_path / "out.parquet",
+ include_original=False,
+ include_mask_outline=False,
+ include_composite=True,
+ )
+
+ columns = captured["df"].columns
+ assert "Image_FileName_DNA_OMEArrow_COMP" in columns
+ assert "Image_FileName_DNA_OMEArrow_ORIG" not in columns
+ assert "Image_FileName_DNA_OMEArrow_LABL" not in columns
+
+
+def test_ome_arrow_columns_render_html(
+ tmp_path: pathlib.Path, cytotable_NF1_data_parquet_shrunken: str
+) -> None:
+ pytest.importorskip(
+ "ome_arrow", reason="OME-Arrow rendering test requires ome-arrow"
+ )
+
+ parquet_path = pathlib.Path(cytotable_NF1_data_parquet_shrunken)
+ image_dir = parquet_path.parent / "Plate_2_images"
+ mask_dir = parquet_path.parent / "Plate_2_masks"
+
+ raw_cdf = CytoDataFrame(
+ data=cytotable_NF1_data_parquet_shrunken,
+ data_context_dir=str(image_dir),
+ data_mask_context_dir=str(mask_dir),
+ )
+
+ ome_path = tmp_path / "nf1.arrow.parquet"
+ raw_cdf.to_ome_parquet(ome_path)
+
+ arrow_cdf = CytoDataFrame(data=ome_path)
+ arrow_cols = [col for col in arrow_cdf.columns if col.endswith("_OMEArrow_COMP")]
+ assert arrow_cols
+
+ html_output = arrow_cdf[arrow_cols]._repr_html_(debug=True)
+ assert "data:image/png;base64" in html_output
+
+
+def test_prepare_layers_mask_binary(tmp_path: pathlib.Path) -> None:
+ image_array = np.zeros((6, 6), dtype=np.uint8)
+ image_path = tmp_path / "cell.tiff"
+ imageio.imwrite(image_path, image_array)
+
+ mask_array = np.zeros((6, 6, 3), dtype=np.uint8)
+ mask_array[1:4, 1:4] = (0, 255, 0)
+ mask_path = tmp_path / "cell_mask.png"
+ imageio.imwrite(mask_path, mask_array)
+
+ data = pd.DataFrame(
+ {
+ "Image_FileName_DNA": ["cell.tiff"],
+ "Image_PathName_DNA": [str(tmp_path)],
+ "Cells_AreaShape_BoundingBoxMinimum_X": [0],
+ "Cells_AreaShape_BoundingBoxMinimum_Y": [0],
+ "Cells_AreaShape_BoundingBoxMaximum_X": [6],
+ "Cells_AreaShape_BoundingBoxMaximum_Y": [6],
+ }
+ )
+
+ cdf = CytoDataFrame(
+ data=data,
+ data_context_dir=str(tmp_path),
+ data_mask_context_dir=str(tmp_path),
+ )
+
+ layers = cdf._prepare_cropped_image_layers(
+ data_value="cell.tiff",
+ bounding_box=(0, 0, 6, 6),
+ include_mask_outline=True,
+ include_original=False,
+ include_composite=False,
+ )
+
+ mask_layer = layers["mask"]
+ assert mask_layer is not None
+ assert mask_layer.dtype == np.uint8
+ assert set(np.unique(mask_layer).tolist()).issubset({0, 255})
+
+
def test_cytodataframe_input(
tmp_path: pathlib.Path,
basic_outlier_dataframe: pd.DataFrame,
@@ -399,7 +629,7 @@ def test_slider_updates_state(monkeypatch: MonkeyPatch):
widget state and triggers the render method.
"""
- # Minimal dummy dataframe
+ # Minimal test dataframe
df = pd.DataFrame({"Image_FileName_DNA": ["example.tif"]})
cdf = CytoDataFrame(df)