diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index c90c46f..45b33ab 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -38,7 +38,7 @@ jobs: run_tests: strategy: matrix: - python_version: ["3.10", "3.11", "3.12", "3.13"] + python_version: ["3.11", "3.12", "3.13"] os: [ubuntu-24.04, macos-14] runs-on: ${{ matrix.os }} env: diff --git a/docs/src/examples/cytodataframe_at_a_glance.ipynb b/docs/src/examples/cytodataframe_at_a_glance.ipynb index e55905d..3492abb 100644 --- a/docs/src/examples/cytodataframe_at_a_glance.ipynb +++ b/docs/src/examples/cytodataframe_at_a_glance.ipynb @@ -49,14 +49,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 967 ms, sys: 678 ms, total: 1.64 s\n", - "Wall time: 630 ms\n" + "CPU times: user 877 ms, sys: 542 ms, total: 1.42 s\n", + "Wall time: 585 ms\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "509bceaad6b04a68b30b58dc6e723961", + "model_id": "90a70b1fe81949c2bfd4c58cc1f6f39d", "version_major": 2, "version_minor": 0 }, @@ -175,14 +175,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 989 ms, sys: 704 ms, total: 1.69 s\n", - "Wall time: 531 ms\n" + "CPU times: user 845 ms, sys: 567 ms, total: 1.41 s\n", + "Wall time: 482 ms\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "b4c2a53d76214ee5b4947a1da047c526", + "model_id": "cf8e8baaf3e64bd78263db0734eadcbf", "version_major": 2, "version_minor": 0 }, @@ -274,7 +274,7 @@ "source": [ "%%time\n", "# view JUMP plate BR00117006 with images and overlaid outlines for segmentation\n", - "CytoDataFrame(\n", + "frame = CytoDataFrame(\n", " data=f\"{jump_data_path}/BR00117006_shrunken.parquet\",\n", " data_context_dir=f\"{jump_data_path}/images/orig\",\n", " data_outline_context_dir=f\"{jump_data_path}/images/outlines\",\n", @@ -286,7 +286,8 @@ " \"Image_FileName_OrigDNA\",\n", " \"Image_FileName_OrigRNA\",\n", " ]\n", - "][:3]" + "][:3]\n", + "frame" ] }, { @@ -299,14 +300,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 909 ms, sys: 683 ms, total: 1.59 s\n", - "Wall time: 478 ms\n" + "CPU times: user 844 ms, sys: 530 ms, total: 1.37 s\n", + "Wall time: 485 ms\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "f53053d547ee4ef7bd73cc1bb5b56eb6", + "model_id": "3931051a74524ba481b50911b96a8921", "version_major": 2, "version_minor": 0 }, @@ -425,14 +426,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 998 ms, sys: 678 ms, total: 1.68 s\n", - "Wall time: 535 ms\n" + "CPU times: user 850 ms, sys: 532 ms, total: 1.38 s\n", + "Wall time: 507 ms\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c9872a2c9e8140688d9d774033e84150", + "model_id": "0370cc7dd475438da444fee2c48463d7", "version_major": 2, "version_minor": 0 }, @@ -560,14 +561,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 860 ms, sys: 544 ms, total: 1.4 s\n", - "Wall time: 515 ms\n" + "CPU times: user 873 ms, sys: 573 ms, total: 1.45 s\n", + "Wall time: 492 ms\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "97cbe4df04fb4063897033139372d528", + "model_id": "9548cc409e5147a99234750f155de932", "version_major": 2, "version_minor": 0 }, @@ -684,14 +685,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 913 ms, sys: 619 ms, total: 1.53 s\n", - "Wall time: 492 ms\n" + "CPU times: user 829 ms, sys: 527 ms, total: 1.36 s\n", + "Wall time: 485 ms\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "acdf7ff4a7f442f38aa496e888d6a941", + "model_id": "743365d93b084a99ad90d8af6c3a65aa", "version_major": 2, "version_minor": 0 }, @@ -810,14 +811,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 957 ms, sys: 651 ms, total: 1.61 s\n", - "Wall time: 505 ms\n" + "CPU times: user 868 ms, sys: 536 ms, total: 1.4 s\n", + "Wall time: 507 ms\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "5ae6de8a7e064553aa571d8cae0a51e1", + "model_id": "97a07fa56cfb4067a0d072b999b9d706", "version_major": 2, "version_minor": 0 }, @@ -935,14 +936,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 906 ms, sys: 637 ms, total: 1.54 s\n", - "Wall time: 488 ms\n" + "CPU times: user 826 ms, sys: 480 ms, total: 1.31 s\n", + "Wall time: 498 ms\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "66e2c484e61343678e625b2e918eb417", + "model_id": "d3e367dd536a44e198c099b47dc289f9", "version_major": 2, "version_minor": 0 }, @@ -1070,6 +1071,158 @@ { "cell_type": "code", "execution_count": 10, + "id": "0febd3ed-b460-4cb3-ba5b-f3de3a33a29a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 2.09 s, sys: 473 ms, total: 2.56 s\n", + "Wall time: 5.45 s\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "cab6bdff53ff441894866b56c3a6d179", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Metadata_ImageNumberCells_Number_Object_NumberImage_FileName_OrigAGPImage_FileName_OrigDNAImage_FileName_OrigRNAImage_FileName_OrigAGP_OMEArrow_ORIGImage_FileName_OrigAGP_OMEArrow_LABLImage_FileName_OrigAGP_OMEArrow_COMPImage_FileName_OrigDNA_OMEArrow_ORIGImage_FileName_OrigDNA_OMEArrow_LABLImage_FileName_OrigDNA_OMEArrow_COMPImage_FileName_OrigRNA_OMEArrow_ORIGImage_FileName_OrigRNA_OMEArrow_LABLImage_FileName_OrigRNA_OMEArrow_COMP
011r01c01f01p01-ch2sk1fk1fl1.tiffr01c01f01p01-ch5sk1fk1fl1.tiffr01c01f01p01-ch3sk1fk1fl1.tiffNone
112r01c01f01p01-ch2sk1fk1fl1.tiffr01c01f01p01-ch5sk1fk1fl1.tiffr01c01f01p01-ch3sk1fk1fl1.tiffNone
213r01c01f01p01-ch2sk1fk1fl1.tiffr01c01f01p01-ch5sk1fk1fl1.tiffr01c01f01p01-ch3sk1fk1fl1.tiffNone
\n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# export to OME Parquet, a format which uses OME Arrow\n", + "# to store OME-spec images as values within the table.\n", + "frame.to_ome_parquet(file_path=\"example.ome.parquet\")\n", + "\n", + "# read OME Parquet file into the CytoDataFrame\n", + "CytoDataFrame(data=\"example.ome.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, "id": "0f845d33-b3b1-4ac7-9e09-ede8042d9306", "metadata": {}, "outputs": [ @@ -1077,14 +1230,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 949 ms, sys: 729 ms, total: 1.68 s\n", - "Wall time: 485 ms\n" + "CPU times: user 881 ms, sys: 550 ms, total: 1.43 s\n", + "Wall time: 514 ms\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8d4aedd7a4fc434aa8bc576dad82c80f", + "model_id": "c19a8f9c323842a4a3af799a2b2f7f90", "version_major": 2, "version_minor": 0 }, @@ -1184,7 +1337,7 @@ "data": { "text/plain": [] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -1218,7 +1371,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "id": "365f68d0-3b66-4fb8-a381-8712dc43188c", "metadata": {}, "outputs": [ @@ -1226,14 +1379,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 307 ms, sys: 217 ms, total: 524 ms\n", - "Wall time: 177 ms\n" + "CPU times: user 244 ms, sys: 162 ms, total: 406 ms\n", + "Wall time: 148 ms\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a58b7b8cf8f74940b770af2ec3110be1", + "model_id": "c7b58a7f6e8c48868cdfa39cf6263e69", "version_major": 2, "version_minor": 0 }, @@ -1317,7 +1470,7 @@ "data": { "text/plain": [] }, - "execution_count": 11, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -1341,7 +1494,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "id": "de89c992-ea92-4565-b03b-3b27ae46d28c", "metadata": {}, "outputs": [ @@ -1349,14 +1502,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 286 ms, sys: 211 ms, total: 497 ms\n", - "Wall time: 155 ms\n" + "CPU times: user 326 ms, sys: 182 ms, total: 508 ms\n", + "Wall time: 239 ms\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9474fab165714ceb979a08b766f603a4", + "model_id": "12489f60233f4875922d660eca4fbc77", "version_major": 2, "version_minor": 0 }, @@ -1440,7 +1593,7 @@ "data": { "text/plain": [] }, - "execution_count": 12, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -1448,7 +1601,7 @@ "source": [ "%%time\n", "# view NF1 Cell Painting data with images and overlaid outlines from masks\n", - "CytoDataFrame(\n", + "frame = CytoDataFrame(\n", " data=f\"{nf1_cellpainting_path}/Plate_2_with_image_data_shrunken.parquet\",\n", " data_context_dir=f\"{nf1_cellpainting_path}/Plate_2_images\",\n", " data_mask_context_dir=f\"{nf1_cellpainting_path}/Plate_2_masks\",\n", @@ -1460,12 +1613,13 @@ " \"Image_FileName_RFP\",\n", " \"Image_FileName_DAPI\",\n", " ]\n", - "][:3]" + "][:3]\n", + "frame" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "id": "49fc2952-eb69-4ed9-bd91-c29843ddbebd", "metadata": {}, "outputs": [ @@ -1473,14 +1627,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 313 ms, sys: 216 ms, total: 529 ms\n", - "Wall time: 176 ms\n" + "CPU times: user 261 ms, sys: 177 ms, total: 437 ms\n", + "Wall time: 149 ms\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8f436180c329426e8524a7463eed328d", + "model_id": "a6b51158acd4499395d2ce1f83c716b7", "version_major": 2, "version_minor": 0 }, @@ -1564,7 +1718,7 @@ "data": { "text/plain": [] }, - "execution_count": 13, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -1600,7 +1754,159 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, + "id": "e21df647-9887-49b3-8328-4f861807abce", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 958 ms, sys: 182 ms, total: 1.14 s\n", + "Wall time: 1.15 s\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "419763b9c98544dfb6a340470f34bf2d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Metadata_ImageNumberMetadata_Cells_Number_Object_NumberImage_FileName_GFPImage_FileName_RFPImage_FileName_DAPIImage_FileName_GFP_OMEArrow_ORIGImage_FileName_GFP_OMEArrow_LABLImage_FileName_GFP_OMEArrow_COMPImage_FileName_RFP_OMEArrow_ORIGImage_FileName_RFP_OMEArrow_LABLImage_FileName_RFP_OMEArrow_COMPImage_FileName_DAPI_OMEArrow_ORIGImage_FileName_DAPI_OMEArrow_LABLImage_FileName_DAPI_OMEArrow_COMP
353314B7_01_2_3_GFP_001.tifB7_01_3_3_RFP_001.tifB7_01_1_3_DAPI_001.tifNone
156411317H12_01_2_1_GFP_001.tifH12_01_3_1_RFP_001.tifH12_01_1_1_DAPI_001.tifNone
1275945F7_01_2_2_GFP_001.tifF7_01_3_2_RFP_001.tifF7_01_1_2_DAPI_001.tifNone
\n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# export to OME Parquet, a format which uses OME Arrow\n", + "# to store OME-spec images as values within the table.\n", + "frame.to_ome_parquet(file_path=\"example.ome.parquet\")\n", + "\n", + "# read OME Parquet file into the CytoDataFrame\n", + "CytoDataFrame(data=\"example.ome.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, "id": "5a5304db-7dac-4f45-aa55-dd3f50299c60", "metadata": {}, "outputs": [ @@ -1608,14 +1914,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 125 ms, sys: 91.5 ms, total: 216 ms\n", - "Wall time: 73.4 ms\n" + "CPU times: user 92.2 ms, sys: 37.9 ms, total: 130 ms\n", + "Wall time: 66.1 ms\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ae82ec39b1ab4517913a6aaaf04aa1cb", + "model_id": "0e19834874994411ab3dbbb3dff790d5", "version_major": 2, "version_minor": 0 }, @@ -1699,7 +2005,7 @@ "data": { "text/plain": [] }, - "execution_count": 14, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1724,7 +2030,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 17, "id": "4c9af999-c9a2-4408-aa16-9437d08013ae", "metadata": {}, "outputs": [ @@ -1732,14 +2038,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 369 ms, sys: 202 ms, total: 570 ms\n", - "Wall time: 226 ms\n" + "CPU times: user 336 ms, sys: 233 ms, total: 570 ms\n", + "Wall time: 185 ms\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "cd820a3a018e4ef19e9c401b0f33f1df", + "model_id": "9a2d765c8ca043d1964aa51c32a0b19f", "version_major": 2, "version_minor": 0 }, @@ -1833,7 +2139,7 @@ "data": { "text/plain": [] }, - "execution_count": 15, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -1862,7 +2168,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 18, "id": "e8ebb16d-ee5f-4a34-b599-aef245b57705", "metadata": {}, "outputs": [ @@ -1870,10 +2176,24 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1 μs, sys: 0 ns, total: 1 μs\n", - "Wall time: 1.91 μs\n" + "CPU times: user 1e+03 ns, sys: 0 ns, total: 1e+03 ns\n", + "Wall time: 3.1 μs\n" ] }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "646b26ef8b5644fc8094789430209d0d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "text/html": [ @@ -1957,7 +2277,7 @@ "data": { "text/plain": [] }, - "execution_count": 16, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -1968,6 +2288,168 @@ "# by quick variable reference.\n", "cdf" ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "0892633a-fdd2-448a-a96a-54dad4b5caf8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 895 ms, sys: 236 ms, total: 1.13 s\n", + "Wall time: 1.05 s\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "0402208f8d74443b845c657b7fd58954", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Metadata_ImageNumberMetadata_Nuclei_Number_Object_NumberImage_FileName_OrigAGPImage_FileName_OrigDNAImage_FileName_OrigAGP_OMEArrow_ORIGImage_FileName_OrigAGP_OMEArrow_LABLImage_FileName_OrigAGP_OMEArrow_COMPImage_FileName_OrigDNA_OMEArrow_ORIGImage_FileName_OrigDNA_OMEArrow_LABLImage_FileName_OrigDNA_OMEArrow_COMP
033r03c03f03p01-ch3sk1fk1fl1.tiffr03c03f03p01-ch5sk1fk1fl1.tiff
134r03c03f03p01-ch3sk1fk1fl1.tiffr03c03f03p01-ch5sk1fk1fl1.tiff
236r03c03f03p01-ch3sk1fk1fl1.tiffr03c03f03p01-ch5sk1fk1fl1.tiff
337r03c03f03p01-ch3sk1fk1fl1.tiffr03c03f03p01-ch5sk1fk1fl1.tiff
438r03c03f03p01-ch3sk1fk1fl1.tiffr03c03f03p01-ch5sk1fk1fl1.tiff
\n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# export to OME Parquet, a format which uses OME Arrow\n", + "# to store OME-spec images as values within the table.\n", + "cdf.to_ome_parquet(file_path=\"example.ome.parquet\")\n", + "\n", + "# read OME Parquet file into the CytoDataFrame\n", + "CytoDataFrame(data=\"example.ome.parquet\")" + ] } ], "metadata": { diff --git a/docs/src/examples/cytodataframe_at_a_glance.py b/docs/src/examples/cytodataframe_at_a_glance.py index 09bdcd3..73f4e2d 100644 --- a/docs/src/examples/cytodataframe_at_a_glance.py +++ b/docs/src/examples/cytodataframe_at_a_glance.py @@ -54,7 +54,7 @@ # %%time # view JUMP plate BR00117006 with images and overlaid outlines for segmentation -CytoDataFrame( +frame = CytoDataFrame( data=f"{jump_data_path}/BR00117006_shrunken.parquet", data_context_dir=f"{jump_data_path}/images/orig", data_outline_context_dir=f"{jump_data_path}/images/outlines", @@ -67,6 +67,7 @@ "Image_FileName_OrigRNA", ] ][:3] +frame # %%time @@ -183,6 +184,16 @@ ] ][:5].T +# + +# %%time +# export to OME Parquet, a format which uses OME Arrow +# to store OME-spec images as values within the table. +frame.to_ome_parquet(file_path="example.ome.parquet") + +# read OME Parquet file into the CytoDataFrame +CytoDataFrame(data="example.ome.parquet") +# - + # %%time # view JUMP plate BR00117006 with images, changing the bounding box # using offsets so each image has roughly the same size. @@ -225,7 +236,7 @@ # %%time # view NF1 Cell Painting data with images and overlaid outlines from masks -CytoDataFrame( +frame = CytoDataFrame( data=f"{nf1_cellpainting_path}/Plate_2_with_image_data_shrunken.parquet", data_context_dir=f"{nf1_cellpainting_path}/Plate_2_images", data_mask_context_dir=f"{nf1_cellpainting_path}/Plate_2_masks", @@ -238,6 +249,7 @@ "Image_FileName_DAPI", ] ][:3] +frame # + # %%time @@ -266,6 +278,15 @@ "Image_FileName_DAPI", ] ][:3] + +# + +# %%time +# export to OME Parquet, a format which uses OME Arrow +# to store OME-spec images as values within the table. +frame.to_ome_parquet(file_path="example.ome.parquet") + +# read OME Parquet file into the CytoDataFrame +CytoDataFrame(data="example.ome.parquet") # - # %%time @@ -308,3 +329,12 @@ # show that we can use the cytodataframe again # by quick variable reference. cdf + +# + +# %%time +# export to OME Parquet, a format which uses OME Arrow +# to store OME-spec images as values within the table. +cdf.to_ome_parquet(file_path="example.ome.parquet") + +# read OME Parquet file into the CytoDataFrame +CytoDataFrame(data="example.ome.parquet") diff --git a/media/coverage-badge.svg b/media/coverage-badge.svg index f81ab84..e21693b 100644 --- a/media/coverage-badge.svg +++ b/media/coverage-badge.svg @@ -1 +1 @@ -coverage: 85.58%coverage85.58% \ No newline at end of file +coverage: 79.06%coverage79.06% \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index 5bfe2b9..a460e60 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.4 and should not be changed by hand. [[package]] name = "accessible-pygments" @@ -44,10 +44,8 @@ files = [ ] [package.dependencies] -exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""} idna = ">=2.8" sniffio = ">=1.1" -typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""} [package.extras] doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] @@ -177,9 +175,6 @@ files = [ {file = "async_lru-2.0.4-py3-none-any.whl", hash = "sha256:ff02944ce3c288c5be660c42dbcca0742b32c3b279d6dceda655190240b99224"}, ] -[package.dependencies] -typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""} - [[package]] name = "attrs" version = "23.2.0" @@ -280,8 +275,6 @@ packaging = ">=22.0" pathspec = ">=0.9.0" platformdirs = ">=2" pytokens = ">=0.3.0" -tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} -typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} [package.extras] colorama = ["colorama (>=0.4.3)"] @@ -634,23 +627,24 @@ test-no-images = ["pytest", "pytest-cov", "pytest-rerunfailures", "pytest-xdist" [[package]] name = "cosmicqc" -version = "0.1.5" +version = "1.0.1" description = "coSMicQC is a software tool for single-cell quality control of morphology datasets." optional = false -python-versions = "<3.13,>=3.10" +python-versions = "<3.14,>=3.10" groups = ["dev"] markers = "python_version < \"3.13\"" files = [ - {file = "cosmicqc-0.1.5-py3-none-any.whl", hash = "sha256:18345e226ce02076214df6256b0fe0013d8578722a4271b884d991c8d518ae9e"}, - {file = "cosmicqc-0.1.5.tar.gz", hash = "sha256:2d29bf0955fbcefd54bb2edb02979e1bc4390443d1ce24c854eab5111c769608"}, + {file = "cosmicqc-1.0.1-py3-none-any.whl", hash = "sha256:b4be60de43c40774aa0d42a3749478d4dff6f08e8b57a2764835cb26a9efe06e"}, + {file = "cosmicqc-1.0.1.tar.gz", hash = "sha256:3bee8896f0084de9e12e02aa55fe1ef51076d5a8b0dba6f00ec8225340ae8918"}, ] [package.dependencies] -cytodataframe = ">=0.0.11" +cytodataframe = {version = ">=0.0.25", markers = "python_version >= \"3.9\""} fire = ">=0.6,<0.8" +llvmlite = "0.45.1" matplotlib = ">=3.10.3,<4.0.0" pandas = {version = ">=2.2.2,<3.0.0", markers = "python_version >= \"3.9\""} -pyarrow = ">=16,<21" +pyarrow = ">=16" pyyaml = ">=6.0.1,<7.0.0" scipy = {version = ">=1.13.0,<2.0.0", markers = "python_version >= \"3.9\""} seaborn = ">=0.13.2,<0.14.0" @@ -909,22 +903,6 @@ files = [ [package.dependencies] packaging = ">=20.9" -[[package]] -name = "exceptiongroup" -version = "1.2.1" -description = "Backport of PEP 654 (exception groups)" -optional = false -python-versions = ">=3.7" -groups = ["main", "dev", "docs"] -markers = "python_version == \"3.10\"" -files = [ - {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, - {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"}, -] - -[package.extras] -test = ["pytest (>=6)"] - [[package]] name = "executing" version = "2.0.1" @@ -1211,18 +1189,21 @@ files = [ {file = "imagecodecs-2025.3.30-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:44dc270d78b7cda29e2d430acbd8dab66322766412e596f450871e2831148aa2"}, {file = "imagecodecs-2025.3.30-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cee56331d9a700e9ec518caeba6d9813ffd7c042f1fae47d2dafcdfc259d2a5"}, {file = "imagecodecs-2025.3.30-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e354fa2046bb7029d0a1ff15a8bb31487ca0d479cd42fdb5c312bcd9408ce3fc"}, + {file = "imagecodecs-2025.3.30-cp311-cp311-win32.whl", hash = "sha256:4ce5c1eb14716bfa733516a69f3b8b77f05cf0541558cc4e8f8991e57d40cc82"}, {file = "imagecodecs-2025.3.30-cp311-cp311-win_amd64.whl", hash = "sha256:7debc7231780d8e44ffcd13aee2178644d93115c19ff73c96cf3068b219ac3a2"}, {file = "imagecodecs-2025.3.30-cp311-cp311-win_arm64.whl", hash = "sha256:2b5c1c02c70da9561da9b728b97599b3ed0ef7d5399979017ce90029f522587b"}, {file = "imagecodecs-2025.3.30-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:dad3f0fc39eb9a88cecb2ccfe0e13eac35b21da36c0171285e4b289b12085235"}, {file = "imagecodecs-2025.3.30-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2806b6e605e674d7e3d21099779a88cb30b9da4807a88e0f02da3ea249085e5f"}, {file = "imagecodecs-2025.3.30-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abfb2231f4741262c91f3e77af85ce1f35b7d44f71414c5d1ba6008cfc3e5672"}, {file = "imagecodecs-2025.3.30-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6583fdcac9a4cd75a7701ed7fac7e74d3836807eb9f8aee22f60f519b748ff56"}, + {file = "imagecodecs-2025.3.30-cp312-cp312-win32.whl", hash = "sha256:ed187770804cbf322b60e24dfc14b8a1e2c321a1b93afb3a7e4948fbb9e99bf0"}, {file = "imagecodecs-2025.3.30-cp312-cp312-win_amd64.whl", hash = "sha256:0b0f6e0f118674c76982e5a25bfeec5e6fc4fc4fc102c0d356e370f473e7b512"}, {file = "imagecodecs-2025.3.30-cp312-cp312-win_arm64.whl", hash = "sha256:bde3bd80cdf65afddb64af4c433549e882a5aa15d300e3781acab8d4df1c94a9"}, {file = "imagecodecs-2025.3.30-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:0bf7248a7949525848f3e2c7d09e837e8333d52c7ac0436c6eed36235da8227b"}, {file = "imagecodecs-2025.3.30-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3e598b6ec77df2517a8d4af6b66393250ba4a8764fccda5dbe6546236df5d11c"}, {file = "imagecodecs-2025.3.30-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:212ae6ba7c656ddf24e8aabefc56c5e2300335ed1305838508c57de202e6dbe4"}, {file = "imagecodecs-2025.3.30-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfa7b1c7d7af449c8153a040f7782d4296350245f8809e49dd4fb5bef4d740e6"}, + {file = "imagecodecs-2025.3.30-cp313-cp313-win32.whl", hash = "sha256:66b614488d85d91f456b949fde4ad678dbe95cde38861043122237de086308c1"}, {file = "imagecodecs-2025.3.30-cp313-cp313-win_amd64.whl", hash = "sha256:1c51fef75fec66b4ea5e98b4ab47889942049389278749e1f96329c38f31c377"}, {file = "imagecodecs-2025.3.30-cp313-cp313-win_arm64.whl", hash = "sha256:eda70c0b9d2bcf225f7ae12dbefd0e3ab92ea7db30cdb56b292517fb61357ad7"}, {file = "imagecodecs-2025.3.30.tar.gz", hash = "sha256:29256f44a7fcfb8f235a3e9b3bae72b06ea2112e63bcc892267a8c01b7097f90"}, @@ -1362,7 +1343,6 @@ files = [ [package.dependencies] colorama = {version = "*", markers = "sys_platform == \"win32\""} decorator = "*" -exceptiongroup = {version = "*", markers = "python_version < \"3.11\""} jedi = ">=0.16" matplotlib-inline = "*" pexpect = {version = ">4.3", markers = "sys_platform != \"win32\" and sys_platform != \"emscripten\""} @@ -1740,7 +1720,6 @@ jupyterlab-server = ">=2.27.1,<3" notebook-shim = ">=0.2" packaging = "*" setuptools = ">=41.1.0" -tomli = {version = ">=1.2.2", markers = "python_version < \"3.11\""} tornado = ">=6.2.0" traitlets = "*" @@ -1839,7 +1818,6 @@ mdit-py-plugins = "*" nbformat = "*" packaging = "*" pyyaml = "*" -tomli = {version = "*", markers = "python_version < \"3.11\""} [package.extras] dev = ["autopep8", "black", "flake8", "gitpython", "ipykernel", "isort", "jupyter-fs[fs] (>=1.0)", "jupyter-server (!=2.11)", "nbconvert", "pre-commit", "pytest", "pytest-asyncio", "pytest-cov (>=2.6.1)", "pytest-randomly", "pytest-xdist", "sphinx", "sphinx-gallery (>=0.8)"] @@ -1995,6 +1973,38 @@ dev = ["changelist (==0.5)"] lint = ["pre-commit (==3.7.0)"] test = ["pytest (>=7.4)", "pytest-cov (>=4.1)"] +[[package]] +name = "llvmlite" +version = "0.45.1" +description = "lightweight wrapper around basic LLVM functionality" +optional = false +python-versions = ">=3.10" +groups = ["dev"] +markers = "python_version < \"3.13\"" +files = [ + {file = "llvmlite-0.45.1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:1b1af0c910af0978aa55fa4f60bbb3e9f39b41e97c2a6d94d199897be62ba07a"}, + {file = "llvmlite-0.45.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02a164db2d79088bbd6e0d9633b4fe4021d6379d7e4ac7cc85ed5f44b06a30c5"}, + {file = "llvmlite-0.45.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f2d47f34e4029e6df3395de34cc1c66440a8d72712993a6e6168db228686711b"}, + {file = "llvmlite-0.45.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f7319e5f9f90720578a7f56fbc805bdfb4bc071b507c7611f170d631c3c0f1e0"}, + {file = "llvmlite-0.45.1-cp310-cp310-win_amd64.whl", hash = "sha256:4edb62e685867799e336723cb9787ec6598d51d0b1ed9af0f38e692aa757e898"}, + {file = "llvmlite-0.45.1-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:60f92868d5d3af30b4239b50e1717cb4e4e54f6ac1c361a27903b318d0f07f42"}, + {file = "llvmlite-0.45.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:98baab513e19beb210f1ef39066288784839a44cd504e24fff5d17f1b3cf0860"}, + {file = "llvmlite-0.45.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3adc2355694d6a6fbcc024d59bb756677e7de506037c878022d7b877e7613a36"}, + {file = "llvmlite-0.45.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2f3377a6db40f563058c9515dedcc8a3e562d8693a106a28f2ddccf2c8fcf6ca"}, + {file = "llvmlite-0.45.1-cp311-cp311-win_amd64.whl", hash = "sha256:f9c272682d91e0d57f2a76c6d9ebdfccc603a01828cdbe3d15273bdca0c3363a"}, + {file = "llvmlite-0.45.1-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:28e763aba92fe9c72296911e040231d486447c01d4f90027c8e893d89d49b20e"}, + {file = "llvmlite-0.45.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1a53f4b74ee9fd30cb3d27d904dadece67a7575198bd80e687ee76474620735f"}, + {file = "llvmlite-0.45.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b3796b1b1e1c14dcae34285d2f4ea488402fbd2c400ccf7137603ca3800864f"}, + {file = "llvmlite-0.45.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:779e2f2ceefef0f4368548685f0b4adde34e5f4b457e90391f570a10b348d433"}, + {file = "llvmlite-0.45.1-cp312-cp312-win_amd64.whl", hash = "sha256:9e6c9949baf25d9aa9cd7cf0f6d011b9ca660dd17f5ba2b23bdbdb77cc86b116"}, + {file = "llvmlite-0.45.1-cp313-cp313-macosx_10_15_x86_64.whl", hash = "sha256:d9ea9e6f17569a4253515cc01dade70aba536476e3d750b2e18d81d7e670eb15"}, + {file = "llvmlite-0.45.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:c9f3cadee1630ce4ac18ea38adebf2a4f57a89bd2740ce83746876797f6e0bfb"}, + {file = "llvmlite-0.45.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:57c48bf2e1083eedbc9406fb83c4e6483017879714916fe8be8a72a9672c995a"}, + {file = "llvmlite-0.45.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3aa3dfceda4219ae39cf18806c60eeb518c1680ff834b8b311bd784160b9ce40"}, + {file = "llvmlite-0.45.1-cp313-cp313-win_amd64.whl", hash = "sha256:080e6f8d0778a8239cd47686d402cb66eb165e421efa9391366a9b7e5810a38b"}, + {file = "llvmlite-0.45.1.tar.gz", hash = "sha256:09430bb9d0bb58fc45a45a57c7eae912850bedc095cd0810a57de109c69e1c32"}, +] + [[package]] name = "markdown-it-py" version = "3.0.0" @@ -2590,7 +2600,6 @@ files = [ [package.dependencies] numpy = [ - {version = ">=1.22.4", markers = "python_version < \"3.11\""}, {version = ">=1.23.2", markers = "python_version == \"3.11\""}, {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, ] @@ -2864,7 +2873,6 @@ files = [ [package.dependencies] pastel = ">=0.2.1,<0.3.0" pyyaml = ">=6.0.2,<7.0" -tomli = {version = ">=1.2.2", markers = "python_version < \"3.11\""} [package.extras] poetry-plugin = ["poetry (>=1.2.0,<3.0.0) ; python_version < \"4.0\""] @@ -3110,12 +3118,10 @@ files = [ [package.dependencies] colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""} -exceptiongroup = {version = ">=1", markers = "python_version < \"3.11\""} iniconfig = ">=1.0.1" packaging = ">=22" pluggy = ">=1.5,<2" pygments = ">=2.7.2" -tomli = {version = ">=1", markers = "python_version < \"3.11\""} [package.extras] dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"] @@ -3517,7 +3523,6 @@ description = "Manipulate well-formed Roman numerals" optional = false python-versions = ">=3.9" groups = ["docs"] -markers = "python_version >= \"3.11\"" files = [ {file = "roman_numerals_py-3.1.0-py3-none-any.whl", hash = "sha256:9da2ad2fb670bcf24e81070ceb3be72f6c11c440d73bd579fbeca1e9f330954c"}, {file = "roman_numerals_py-3.1.0.tar.gz", hash = "sha256:be4bf804f083a4ce001b5eb7e3c0862479d10f94c936f6c4e5f250aa5ff5bd2d"}, @@ -3690,71 +3695,6 @@ docs = ["PyWavelets (>=1.6)", "dask[array] (>=2023.2.0)", "intersphinx-registry optional = ["PyWavelets (>=1.6)", "SimpleITK", "astropy (>=5.0)", "cloudpickle (>=1.1.1)", "dask[array] (>=2023.2.0)", "matplotlib (>=3.7)", "pooch (>=1.6.0)", "pyamg (>=5.2)", "scikit-learn (>=1.2)"] test = ["asv", "numpydoc (>=1.7)", "pooch (>=1.6.0)", "pytest (>=8)", "pytest-cov (>=2.11.0)", "pytest-doctestplus", "pytest-faulthandler", "pytest-localserver"] -[[package]] -name = "scipy" -version = "1.15.3" -description = "Fundamental algorithms for scientific computing in Python" -optional = false -python-versions = ">=3.10" -groups = ["main", "dev"] -markers = "python_version == \"3.10\"" -files = [ - {file = "scipy-1.15.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:a345928c86d535060c9c2b25e71e87c39ab2f22fc96e9636bd74d1dbf9de448c"}, - {file = "scipy-1.15.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:ad3432cb0f9ed87477a8d97f03b763fd1d57709f1bbde3c9369b1dff5503b253"}, - {file = "scipy-1.15.3-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:aef683a9ae6eb00728a542b796f52a5477b78252edede72b8327a886ab63293f"}, - {file = "scipy-1.15.3-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:1c832e1bd78dea67d5c16f786681b28dd695a8cb1fb90af2e27580d3d0967e92"}, - {file = "scipy-1.15.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:263961f658ce2165bbd7b99fa5135195c3a12d9bef045345016b8b50c315cb82"}, - {file = "scipy-1.15.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e2abc762b0811e09a0d3258abee2d98e0c703eee49464ce0069590846f31d40"}, - {file = "scipy-1.15.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ed7284b21a7a0c8f1b6e5977ac05396c0d008b89e05498c8b7e8f4a1423bba0e"}, - {file = "scipy-1.15.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5380741e53df2c566f4d234b100a484b420af85deb39ea35a1cc1be84ff53a5c"}, - {file = "scipy-1.15.3-cp310-cp310-win_amd64.whl", hash = "sha256:9d61e97b186a57350f6d6fd72640f9e99d5a4a2b8fbf4b9ee9a841eab327dc13"}, - {file = "scipy-1.15.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:993439ce220d25e3696d1b23b233dd010169b62f6456488567e830654ee37a6b"}, - {file = "scipy-1.15.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:34716e281f181a02341ddeaad584205bd2fd3c242063bd3423d61ac259ca7eba"}, - {file = "scipy-1.15.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3b0334816afb8b91dab859281b1b9786934392aa3d527cd847e41bb6f45bee65"}, - {file = "scipy-1.15.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:6db907c7368e3092e24919b5e31c76998b0ce1684d51a90943cb0ed1b4ffd6c1"}, - {file = "scipy-1.15.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:721d6b4ef5dc82ca8968c25b111e307083d7ca9091bc38163fb89243e85e3889"}, - {file = "scipy-1.15.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39cb9c62e471b1bb3750066ecc3a3f3052b37751c7c3dfd0fd7e48900ed52982"}, - {file = "scipy-1.15.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:795c46999bae845966368a3c013e0e00947932d68e235702b5c3f6ea799aa8c9"}, - {file = "scipy-1.15.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:18aaacb735ab38b38db42cb01f6b92a2d0d4b6aabefeb07f02849e47f8fb3594"}, - {file = "scipy-1.15.3-cp311-cp311-win_amd64.whl", hash = "sha256:ae48a786a28412d744c62fd7816a4118ef97e5be0bee968ce8f0a2fba7acf3bb"}, - {file = "scipy-1.15.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6ac6310fdbfb7aa6612408bd2f07295bcbd3fda00d2d702178434751fe48e019"}, - {file = "scipy-1.15.3-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:185cd3d6d05ca4b44a8f1595af87f9c372bb6acf9c808e99aa3e9aa03bd98cf6"}, - {file = "scipy-1.15.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:05dc6abcd105e1a29f95eada46d4a3f251743cfd7d3ae8ddb4088047f24ea477"}, - {file = "scipy-1.15.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:06efcba926324df1696931a57a176c80848ccd67ce6ad020c810736bfd58eb1c"}, - {file = "scipy-1.15.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05045d8b9bfd807ee1b9f38761993297b10b245f012b11b13b91ba8945f7e45"}, - {file = "scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:271e3713e645149ea5ea3e97b57fdab61ce61333f97cfae392c28ba786f9bb49"}, - {file = "scipy-1.15.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6cfd56fc1a8e53f6e89ba3a7a7251f7396412d655bca2aa5611c8ec9a6784a1e"}, - {file = "scipy-1.15.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0ff17c0bb1cb32952c09217d8d1eed9b53d1463e5f1dd6052c7857f83127d539"}, - {file = "scipy-1.15.3-cp312-cp312-win_amd64.whl", hash = "sha256:52092bc0472cfd17df49ff17e70624345efece4e1a12b23783a1ac59a1b728ed"}, - {file = "scipy-1.15.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2c620736bcc334782e24d173c0fdbb7590a0a436d2fdf39310a8902505008759"}, - {file = "scipy-1.15.3-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:7e11270a000969409d37ed399585ee530b9ef6aa99d50c019de4cb01e8e54e62"}, - {file = "scipy-1.15.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:8c9ed3ba2c8a2ce098163a9bdb26f891746d02136995df25227a20e71c396ebb"}, - {file = "scipy-1.15.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:0bdd905264c0c9cfa74a4772cdb2070171790381a5c4d312c973382fc6eaf730"}, - {file = "scipy-1.15.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79167bba085c31f38603e11a267d862957cbb3ce018d8b38f79ac043bc92d825"}, - {file = "scipy-1.15.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9deabd6d547aee2c9a81dee6cc96c6d7e9a9b1953f74850c179f91fdc729cb7"}, - {file = "scipy-1.15.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dde4fc32993071ac0c7dd2d82569e544f0bdaff66269cb475e0f369adad13f11"}, - {file = "scipy-1.15.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f77f853d584e72e874d87357ad70f44b437331507d1c311457bed8ed2b956126"}, - {file = "scipy-1.15.3-cp313-cp313-win_amd64.whl", hash = "sha256:b90ab29d0c37ec9bf55424c064312930ca5f4bde15ee8619ee44e69319aab163"}, - {file = "scipy-1.15.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3ac07623267feb3ae308487c260ac684b32ea35fd81e12845039952f558047b8"}, - {file = "scipy-1.15.3-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6487aa99c2a3d509a5227d9a5e889ff05830a06b2ce08ec30df6d79db5fcd5c5"}, - {file = "scipy-1.15.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:50f9e62461c95d933d5c5ef4a1f2ebf9a2b4e83b0db374cb3f1de104d935922e"}, - {file = "scipy-1.15.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:14ed70039d182f411ffc74789a16df3835e05dc469b898233a245cdfd7f162cb"}, - {file = "scipy-1.15.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a769105537aa07a69468a0eefcd121be52006db61cdd8cac8a0e68980bbb723"}, - {file = "scipy-1.15.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9db984639887e3dffb3928d118145ffe40eff2fa40cb241a306ec57c219ebbbb"}, - {file = "scipy-1.15.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:40e54d5c7e7ebf1aa596c374c49fa3135f04648a0caabcb66c52884b943f02b4"}, - {file = "scipy-1.15.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5e721fed53187e71d0ccf382b6bf977644c533e506c4d33c3fb24de89f5c3ed5"}, - {file = "scipy-1.15.3-cp313-cp313t-win_amd64.whl", hash = "sha256:76ad1fb5f8752eabf0fa02e4cc0336b4e8f021e2d5f061ed37d6d264db35e3ca"}, - {file = "scipy-1.15.3.tar.gz", hash = "sha256:eae3cf522bc7df64b42cad3925c876e1b0b6c35c1337c93e12c0f366f55b0eaf"}, -] - -[package.dependencies] -numpy = ">=1.23.5,<2.5" - -[package.extras] -dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy (==1.10.0)", "pycodestyle", "pydevtool", "rich-click", "ruff (>=0.0.292)", "types-psutil", "typing_extensions"] -doc = ["intersphinx_registry", "jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.19.1)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0,<8.0.0)", "sphinx-copybutton", "sphinx-design (>=0.4.0)"] -test = ["Cython", "array-api-strict (>=2.0,<2.1.1)", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja ; sys_platform != \"emscripten\"", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] - [[package]] name = "scipy" version = "1.16.0" @@ -3801,7 +3741,7 @@ files = [ {file = "scipy-1.16.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f56296fefca67ba605fd74d12f7bd23636267731a72cb3947963e76b8c0a25db"}, {file = "scipy-1.16.0.tar.gz", hash = "sha256:b5ef54021e832869c8cfb03bc3bf20366cbcd426e02a58e8a58d7584dfbb8f62"}, ] -markers = {main = "python_version >= \"3.11\"", dev = "python_version < \"3.13\" and python_version >= \"3.11\""} +markers = {dev = "python_version < \"3.13\""} [package.dependencies] numpy = ">=1.25.2,<2.6" @@ -3920,43 +3860,6 @@ files = [ {file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"}, ] -[[package]] -name = "sphinx" -version = "8.1.3" -description = "Python documentation generator" -optional = false -python-versions = ">=3.10" -groups = ["docs"] -markers = "python_version == \"3.10\"" -files = [ - {file = "sphinx-8.1.3-py3-none-any.whl", hash = "sha256:09719015511837b76bf6e03e42eb7595ac8c2e41eeb9c29c5b755c6b677992a2"}, - {file = "sphinx-8.1.3.tar.gz", hash = "sha256:43c1911eecb0d3e161ad78611bc905d1ad0e523e4ddc202a58a821773dc4c927"}, -] - -[package.dependencies] -alabaster = ">=0.7.14" -babel = ">=2.13" -colorama = {version = ">=0.4.6", markers = "sys_platform == \"win32\""} -docutils = ">=0.20,<0.22" -imagesize = ">=1.3" -Jinja2 = ">=3.1" -packaging = ">=23.0" -Pygments = ">=2.17" -requests = ">=2.30.0" -snowballstemmer = ">=2.2" -sphinxcontrib-applehelp = ">=1.0.7" -sphinxcontrib-devhelp = ">=1.0.6" -sphinxcontrib-htmlhelp = ">=2.0.6" -sphinxcontrib-jsmath = ">=1.0.1" -sphinxcontrib-qthelp = ">=1.0.6" -sphinxcontrib-serializinghtml = ">=1.1.9" -tomli = {version = ">=2", markers = "python_version < \"3.11\""} - -[package.extras] -docs = ["sphinxcontrib-websupport"] -lint = ["flake8 (>=6.0)", "mypy (==1.11.1)", "pyright (==1.1.384)", "pytest (>=6.0)", "ruff (==0.6.9)", "sphinx-lint (>=0.9)", "tomli (>=2)", "types-Pillow (==10.2.0.20240822)", "types-Pygments (==2.18.0.20240506)", "types-colorama (==0.4.15.20240311)", "types-defusedxml (==0.7.0.20240218)", "types-docutils (==0.21.0.20241005)", "types-requests (==2.32.0.20240914)", "types-urllib3 (==1.26.25.14)"] -test = ["cython (>=3.0)", "defusedxml (>=0.7.1)", "pytest (>=8.0)", "setuptools (>=70.0)", "typing_extensions (>=4.9)"] - [[package]] name = "sphinx" version = "8.2.3" @@ -3964,7 +3867,6 @@ description = "Python documentation generator" optional = false python-versions = ">=3.11" groups = ["docs"] -markers = "python_version >= \"3.11\"" files = [ {file = "sphinx-8.2.3-py3-none-any.whl", hash = "sha256:4405915165f13521d875a8c29c8970800a0141c14cc5416a38feca4ea5d9b9c3"}, {file = "sphinx-8.2.3.tar.gz", hash = "sha256:398ad29dee7f63a75888314e9424d40f52ce5a6a87ae88e7071e80af296ec348"}, @@ -4319,19 +4221,6 @@ webencodings = ">=0.4" doc = ["sphinx", "sphinx_rtd_theme"] test = ["pytest", "ruff"] -[[package]] -name = "tomli" -version = "2.0.1" -description = "A lil' TOML parser" -optional = false -python-versions = ">=3.7" -groups = ["dev", "docs"] -markers = "python_version == \"3.10\"" -files = [ - {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, - {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, -] - [[package]] name = "tornado" version = "6.5.1" @@ -4393,7 +4282,7 @@ files = [ {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, ] -markers = {main = "python_version < \"3.12\""} +markers = {main = "python_version == \"3.11\""} [[package]] name = "tzdata" @@ -4527,5 +4416,5 @@ test = ["big-O", "importlib-resources ; python_version < \"3.9\"", "jaraco.funct [metadata] lock-version = "2.1" -python-versions = ">=3.10,<3.14" -content-hash = "e9942c2c0e0ae6e1750f042416105fb7d2cd2287a930aef2b98f36542bad99b5" +python-versions = ">=3.11,<3.14" +content-hash = "36ea2d832a003c383766c71a850c800ee70edca5f1b72acc3cc2d7546c35e0ec" diff --git a/pyproject.toml b/pyproject.toml index 33b3376..5daa0c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,14 +15,14 @@ packages = [ { include = "cytodataframe", from = "src" } ] poetry-dynamic-versioning = { version = ">=1.0.0,<2.0.0", extras = [ "plugin" ] } [tool.poetry.dependencies] -python = ">=3.10,<3.14" +python = ">=3.11,<3.14" # used for data management pandas = [ { version = "<2.2.2", python = "<3.9" }, { version = "^2.2.2", python = ">=3.9" }, ] # used for data ingest and export -pyarrow = ">=16,<21" +pyarrow = ">=16" # used for environment detection ipython = "^8.12.3" # used for image processing @@ -56,7 +56,7 @@ isort = ">=5.13.2,<8.0.0" jupyterlab-code-formatter = "^3.0.2" duckdb = "^1.1.3" # version specifications below used to help surpass 3.13-based installations -cosmicqc = { version = "^0.1.4", markers = "python_version < '3.13' and python_version >= '3.10'" } +cosmicqc = { version = ">=1.0.1", markers = "python_version < '3.13' and python_version >= '3.10'" } poethepoet = "^0.37.0" [tool.poetry.group.docs.dependencies] @@ -116,6 +116,7 @@ lint.select = [ ] # Ignore `E402` and `F401` (unused imports) in all `__init__.py` files lint.per-file-ignores."__init__.py" = [ "E402", "F401" ] +lint.per-file-ignores."src/cytodataframe/*.py" = [ "ANN401", "PLC0415" ] lint.per-file-ignores."src/cytodataframe/image.py" = [ "PLR2004" ] # ignore typing rules for tests lint.per-file-ignores."tests/*" = [ "ANN201", "PLR0913", "PLR2004", "SIM105" ] diff --git a/src/cytodataframe/frame.py b/src/cytodataframe/frame.py index 1581a95..246e888 100644 --- a/src/cytodataframe/frame.py +++ b/src/cytodataframe/frame.py @@ -7,6 +7,8 @@ import pathlib import re import sys +import tempfile +import uuid import warnings from io import BytesIO, StringIO from typing import ( @@ -285,7 +287,7 @@ def __init__( # noqa: PLR0913 # instead of Pandas DataFrames. self._wrap_methods() - def __getitem__(self: CytoDataFrame_type, key: Union[int, str]) -> Any: # noqa: ANN401 + def __getitem__(self: CytoDataFrame_type, key: Union[int, str]) -> Any: """ Returns an element or a slice of the underlying pandas DataFrame. @@ -330,7 +332,7 @@ def _return_cytodataframe( method_name: str, *args: Tuple[Any, ...], **kwargs: Dict[str, Any], - ) -> Any: # noqa: ANN401 + ) -> Any: """ Wraps a given method to ensure that the returned result is an CytoDataFrame if applicable. @@ -400,7 +402,7 @@ def _wrap_method(self: CytoDataFrame_type, method_name: str) -> Callable: the result is a CytoDataFrame. """ - def wrapper(*args: Tuple[Any, ...], **kwargs: Dict[str, Any]) -> Any: # noqa: ANN401 + def wrapper(*args: Tuple[Any, ...], **kwargs: Dict[str, Any]) -> Any: """ Wraps the specified method to ensure it returns a CytoDataFrame. @@ -639,6 +641,283 @@ def export( else: raise ValueError("Unsupported file format for export.") + def to_ome_parquet( # noqa: PLR0915, PLR0912, C901 + self: CytoDataFrame_type, + file_path: Union[str, pathlib.Path], + arrow_column_suffix: str = "_OMEArrow", + include_original: bool = True, + include_mask_outline: bool = True, + include_composite: bool = True, + **kwargs: Dict[str, Any], + ) -> None: + """Export the dataframe with cropped images encoded as OMEArrow structs.""" + + try: + from ome_arrow import OMEArrow # type: ignore + except ImportError as exc: + raise ImportError( + "CytoDataFrame.to_ome_parquet requires the optional 'ome-arrow' " + "dependency. Install it via `pip install ome-arrow`." + ) from exc + + try: + import importlib.metadata as importlib_metadata + except ImportError: # pragma: no cover + import importlib_metadata # type: ignore + + try: + ome_arrow_version = importlib_metadata.version("ome-arrow") + except importlib_metadata.PackageNotFoundError: + module = sys.modules.get("ome_arrow") + ome_arrow_version = getattr(module, "__version__", None) + + if not any((include_original, include_mask_outline, include_composite)): + raise ValueError( + "At least one of include_original, include_mask_outline, or " + "include_composite must be True." + ) + + image_cols = self.find_image_columns() or [] + if not image_cols: + logger.debug( + "No image filename columns detected. Falling back to to_parquet()." + ) + self.to_parquet(file_path, **kwargs) + return + + bounding_box_df = self._custom_attrs.get("data_bounding_box") + if bounding_box_df is None: + raise ValueError( + "to_ome_parquet requires bounding box metadata to crop images." + ) + + bounding_box_cols = bounding_box_df.columns.tolist() + bbox_column_map = { + "x_min": next( + (col for col in bounding_box_cols if "Minimum_X" in str(col)), None + ), + "y_min": next( + (col for col in bounding_box_cols if "Minimum_Y" in str(col)), None + ), + "x_max": next( + (col for col in bounding_box_cols if "Maximum_X" in str(col)), None + ), + "y_max": next( + (col for col in bounding_box_cols if "Maximum_Y" in str(col)), None + ), + } + + if any(value is None for value in bbox_column_map.values()): + raise ValueError( + "Unable to identify all bounding box coordinate columns for export." + ) + + working_df = self.copy() + + missing_bbox_cols = [ + col for col in bounding_box_cols if col not in working_df.columns + ] + if missing_bbox_cols: + working_df = working_df.join(bounding_box_df[missing_bbox_cols]) + + comp_center_df = self._custom_attrs.get("compartment_center_xy") + comp_center_cols: List[str] = [] + missing_comp_cols: List[str] = [] + if comp_center_df is not None: + comp_center_cols = comp_center_df.columns.tolist() + missing_comp_cols = [ + col for col in comp_center_cols if col not in working_df.columns + ] + if missing_comp_cols: + working_df = working_df.join(comp_center_df[missing_comp_cols]) + + image_path_df = self._custom_attrs.get("data_image_paths") + missing_path_cols: List[str] = [] + if image_path_df is not None: + image_path_cols_all = image_path_df.columns.tolist() + missing_path_cols = [ + col for col in image_path_cols_all if col not in working_df.columns + ] + if missing_path_cols: + working_df = working_df.join(image_path_df[missing_path_cols]) + + all_cols_str, all_cols_back = self._normalize_labels(working_df.columns) + image_cols_str = [str(col) for col in image_cols] + image_path_cols_str = self.find_image_path_columns( + image_cols=image_cols_str, all_cols=all_cols_str + ) + image_path_cols = {} + for image_col in image_cols: + key = str(image_col) + if key in image_path_cols_str: + mapped_col = image_path_cols_str[key] + image_path_cols[image_col] = all_cols_back.get( + str(mapped_col), mapped_col + ) + + comp_center_x = next((col for col in comp_center_cols if "X" in str(col)), None) + comp_center_y = next((col for col in comp_center_cols if "Y" in str(col)), None) + + kwargs.setdefault("engine", "pyarrow") + + from cytodataframe import __version__ as cytodataframe_version + + metadata = { + "cytodataframe:data-producer": "https://github.com/cytomining/CytoDataFrame", + "cytodataframe:data-producer-version": cytodataframe_version, + } + if ome_arrow_version is not None: + metadata["cytodataframe:ome-arrow-version"] = ome_arrow_version + + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir_path = pathlib.Path(tmpdir) + for image_col in image_cols: + image_path_col = image_path_cols.get(image_col) + + layer_configs: List[Tuple[str, str]] = [] + if include_original: + layer_configs.append( + ("original", f"{image_col}{arrow_column_suffix}_ORIG") + ) + if include_mask_outline: + layer_configs.append( + ("mask", f"{image_col}{arrow_column_suffix}_LABL") + ) + if include_composite: + layer_configs.append( + ("composite", f"{image_col}{arrow_column_suffix}_COMP") + ) + + column_values = {col_name: [] for _, col_name in layer_configs} + + for _, row in working_df.iterrows(): + image_value = row.get(image_col) + if image_value is None or pd.isna(image_value): + for _, col_name in layer_configs: + column_values[col_name].append(None) + continue + + try: + bbox_values = ( + row[bbox_column_map["x_min"]], + row[bbox_column_map["y_min"]], + row[bbox_column_map["x_max"]], + row[bbox_column_map["y_max"]], + ) + except KeyError: + for _, col_name in layer_configs: + column_values[col_name].append(None) + continue + + if any(pd.isna(value) for value in bbox_values): + for _, col_name in layer_configs: + column_values[col_name].append(None) + continue + + bounding_box = tuple(int(value) for value in bbox_values) + + compartment_center = None + if comp_center_x and comp_center_y: + center_vals = (row.get(comp_center_x), row.get(comp_center_y)) + if not any(val is None or pd.isna(val) for val in center_vals): + compartment_center = tuple(int(v) for v in center_vals) + + image_path_value = ( + row.get(image_path_col) if image_path_col is not None else None + ) + + layers = self._prepare_cropped_image_layers( + data_value=image_value, + bounding_box=bounding_box, + compartment_center_xy=compartment_center, + image_path=image_path_value, + include_original=include_original, + include_mask_outline=include_mask_outline, + include_composite=include_composite, + ) + + sanitized_col = re.sub(r"[^A-Za-z0-9_.-]", "_", str(image_col)) + + for layer_key, col_name in layer_configs: + layer_array = layers.get(layer_key) + if layer_array is None: + column_values[col_name].append(None) + continue + + temp_path = ( + tmpdir_path + / f"{sanitized_col}_{layer_key}_{uuid.uuid4().hex}.tiff" + ) + try: + with warnings.catch_warnings(): + warnings.simplefilter("ignore", UserWarning) + imageio.imwrite(temp_path, layer_array, format="tiff") + except Exception as exc: + logger.error( + "Failed to write temporary TIFF for OMEArrow (%s): %s", + layer_key, + exc, + ) + column_values[col_name].append(None) + continue + try: + ome_struct = OMEArrow(data=str(temp_path)).data + if hasattr(ome_struct, "as_py"): + ome_struct = ome_struct.as_py() + except Exception as exc: + logger.error( + "Failed to create OMEArrow struct for %s: %s", + layer_key, + exc, + ) + column_values[col_name].append(None) + continue + column_values[col_name].append(ome_struct) + + for _, col_name in layer_configs: + working_df[col_name] = column_values[col_name] + + if missing_bbox_cols: + working_df = working_df.drop(columns=missing_bbox_cols) + + if missing_comp_cols: + working_df = working_df.drop(columns=missing_comp_cols) + + if missing_path_cols: + working_df = working_df.drop(columns=missing_path_cols) + + final_kwargs = kwargs.copy() + engine = final_kwargs.pop("engine", None) + existing_metadata = final_kwargs.pop("metadata", {}) or {} + merged_metadata = {**metadata, **existing_metadata} + + index_arg = final_kwargs.pop("index", None) + if merged_metadata: + import pyarrow as pa + import pyarrow.parquet as pq + + table = pa.Table.from_pandas( + working_df, + preserve_index=True if index_arg is None else index_arg, + ) + existing = table.schema.metadata or {} + new_metadata = { + **existing, + **{ + str(k).encode(): str(v).encode() + for k, v in merged_metadata.items() + if v is not None + }, + } + table = table.replace_schema_metadata(new_metadata) + pq.write_table(table, file_path, **final_kwargs) + else: + if index_arg is not None: + final_kwargs["index"] = index_arg + if engine is not None: + final_kwargs["engine"] = engine + working_df.to_parquet(file_path, **final_kwargs) + @staticmethod def is_notebook_or_lab() -> bool: """ @@ -703,6 +982,33 @@ def find_image_columns(self: CytoDataFrame_type) -> List[str]: return image_cols + @staticmethod + def _is_ome_arrow_value(value: Any) -> bool: + """Check whether a value looks like an OME-Arrow struct.""" + + return ( + isinstance(value, dict) + and value.get("type") == "ome.arrow" + and value.get("planes") is not None + and value.get("pixels_meta") is not None + ) + + def find_ome_arrow_columns( + self: CytoDataFrame_type, data: pd.DataFrame + ) -> List[str]: + """Identify columns that contain OME-Arrow structs.""" + + ome_cols: List[str] = [] + for column in data.columns: + series = data[column] + if series.apply(self._is_ome_arrow_value).any(): + ome_cols.append(column) + + if ome_cols: + logger.debug("Found OME-Arrow columns: %s", ome_cols) + + return ome_cols + def get_image_paths_from_data( self: CytoDataFrame_type, image_cols: List[str] ) -> Dict[str, str]: @@ -771,7 +1077,7 @@ def search_for_mask_or_outline( # noqa: PLR0913, PLR0911, C901 candidate_path: pathlib.Path, orig_image: np.ndarray, mask: bool = True, - ) -> np.ndarray: + ) -> Tuple[Optional[np.ndarray], Optional[pathlib.Path]]: """ Search for a mask or outline image file based on the provided patterns and apply it to the target image. @@ -805,7 +1111,7 @@ def search_for_mask_or_outline( # noqa: PLR0913, PLR0911, C901 if file_dir is None: logger.debug("No mask or outline directory specified.") - return None + return None, None if pattern_map is None: matching_mask_file = list( @@ -823,18 +1129,24 @@ def search_for_mask_or_outline( # noqa: PLR0913, PLR0911, C901 outline_color = display_options.get("outline_color", (0, 255, 0)) if mask: - return draw_outline_on_image_from_mask( - orig_image=orig_image, - mask_image_path=matching_mask_file[0], - outline_color=outline_color, + return ( + draw_outline_on_image_from_mask( + orig_image=orig_image, + mask_image_path=matching_mask_file[0], + outline_color=outline_color, + ), + matching_mask_file[0], ) else: - return draw_outline_on_image_from_outline( - orig_image=orig_image, - outline_image_path=matching_mask_file[0], - outline_color=outline_color, + return ( + draw_outline_on_image_from_outline( + orig_image=orig_image, + outline_image_path=matching_mask_file[0], + outline_color=outline_color, + ), + matching_mask_file[0], ) - return None + return None, None for file_pattern, original_pattern in pattern_map.items(): if re.search(original_pattern, data_value): @@ -856,55 +1168,114 @@ def search_for_mask_or_outline( # noqa: PLR0913, PLR0911, C901 # gather the outline color if specified outline_color = display_options.get("outline_color", (0, 255, 0)) if mask: - return draw_outline_on_image_from_mask( - orig_image=orig_image, - mask_image_path=matching_files[0], - outline_color=outline_color, + return ( + draw_outline_on_image_from_mask( + orig_image=orig_image, + mask_image_path=matching_files[0], + outline_color=outline_color, + ), + matching_files[0], ) else: - return draw_outline_on_image_from_outline( - orig_image=orig_image, - outline_image_path=matching_files[0], - outline_color=outline_color, + return ( + draw_outline_on_image_from_outline( + orig_image=orig_image, + outline_image_path=matching_files[0], + outline_color=outline_color, + ), + matching_files[0], ) logger.debug("No mask or outline found for: %s", data_value) - return None + return None, None - def process_image_data_as_html_display( # noqa: PLR0912, C901, PLR0915 + def _extract_array_from_ome_arrow( # noqa: PLR0911 self: CytoDataFrame_type, - data_value: Any, # noqa: ANN401 + data_value: Any, + ) -> Optional[np.ndarray]: + """Convert an OME-Arrow struct (dict) into an ndarray.""" + + if not self._is_ome_arrow_value(data_value): + return None + + try: + pixels_meta = data_value.get("pixels_meta", {}) + size_x = int(pixels_meta.get("size_x")) + size_y = int(pixels_meta.get("size_y")) + planes = data_value.get("planes") + + if size_x <= 0 or size_y <= 0 or planes is None: + return None + + if isinstance(planes, np.ndarray): + plane_entries = planes.tolist() + else: + plane_entries = list(planes) + + if not plane_entries: + return None + + plane = plane_entries[0] + pixels = plane.get("pixels") + if pixels is None: + return None + + np_pixels = np.asarray(pixels) + base = size_x * size_y + if base <= 0 or np_pixels.size == 0 or np_pixels.size % base != 0: + return None + + channel_count = np_pixels.size // base + if channel_count == 1: + array = np_pixels.reshape((size_y, size_x)) + else: + array = np_pixels.reshape((size_y, size_x, channel_count)) + + return self._ensure_uint8(array) + except Exception as exc: + logger.debug("Unable to decode OME-Arrow struct: %s", exc) + return None + + @staticmethod + def _ensure_uint8(array: np.ndarray) -> np.ndarray: + """Convert the provided array to uint8 without unnecessary warnings.""" + + arr = np.asarray(array) + if np.issubdtype(arr.dtype, np.integer): + min_val = arr.min(initial=0) + max_val = arr.max(initial=0) + if 0 <= min_val <= 255 and 0 <= max_val <= 255: # noqa: PLR2004 + return arr.astype(np.uint8, copy=False) + return img_as_ubyte(arr) + + @staticmethod + def _ensure_uint8(array: np.ndarray) -> np.ndarray: + """Convert the provided array to uint8 without unnecessary warnings.""" + + arr = np.asarray(array) + if np.issubdtype(arr.dtype, np.integer): + min_val = arr.min(initial=0) + max_val = arr.max(initial=0) + if min_val >= 0 and max_val <= 255: # noqa: PLR2004 + return arr.astype(np.uint8, copy=False) + return img_as_ubyte(arr) + + def _prepare_cropped_image_layers( # noqa: C901, PLR0915, PLR0912, PLR0913 + self: CytoDataFrame_type, + data_value: Any, bounding_box: Tuple[int, int, int, int], compartment_center_xy: Optional[Tuple[int, int]] = None, image_path: Optional[str] = None, - ) -> str: - """ - Process the image data based on the provided data value - and bounding box, applying masks or outlines where - applicable, and return an HTML representation of the - cropped image for display. - - Args: - data_value (Any): - The value to search for in the file system or as the image data. - bounding_box (Tuple[int, int, int, int]): - The bounding box to crop the image. - compartment_center_xy (Optional[Tuple[int, int]]): - The center coordinates of the compartment. - image_path (Optional[str]): - The path to the image file. - - Returns: - str: - The HTML image display string, or the unmodified data - value if the image cannot be processed. - """ + include_original: bool = False, + include_mask_outline: bool = False, + include_composite: bool = True, + ) -> Dict[str, Optional[np.ndarray]]: + """Return requested cropped image layers for downstream consumers.""" logger.debug( ( - "Processing image data as HTML for display." - " Data value: %s , Bounding box: %s , " + "Preparing cropped layers. Data value: %s, Bounding box: %s, " "Compartment center xy: %s, Image path: %s" ), data_value, @@ -913,55 +1284,58 @@ def process_image_data_as_html_display( # noqa: PLR0912, C901, PLR0915 image_path, ) - # stringify the data value in case it isn't a string - data_value = str(data_value) + layers: Dict[str, Optional[np.ndarray]] = {} + + if array := self._extract_array_from_ome_arrow(data_value): + if include_original: + layers["original"] = array + if include_mask_outline: + layers["mask"] = array + if include_composite: + layers["composite"] = array + return layers + data_value = str(data_value) candidate_path = None - # Get the pattern map for segmentation file regex - pattern_map = self._custom_attrs.get("segmentation_file_regex") - # Step 1: Find the candidate file if the data value is not already a file - if not pathlib.Path(data_value).is_file(): - # determine if we have a file from the path (dir) + filename - if ( - self._custom_attrs["data_context_dir"] is None - and image_path is not None - and ( - existing_image_from_path := pathlib.Path( - f"{image_path}/{data_value}" - ) - ).is_file() - ): - logger.debug( - "Found existing image from path: %s", existing_image_from_path - ) - candidate_path = existing_image_from_path + if image_path is not None and pd.isna(image_path): + image_path = None - # Search for the data value in the data context directory - elif self._custom_attrs["data_context_dir"] is not None and ( - candidate_paths := list( - pathlib.Path(self._custom_attrs["data_context_dir"]).rglob( - data_value - ) - ) - ): - logger.debug( - "Found candidate paths (and attempting to use the first): %s", - candidate_paths, - ) - # If a candidate file is found, use the first one - candidate_path = candidate_paths[0] + pattern_map = self._custom_attrs.get("segmentation_file_regex") - else: - logger.debug("No candidate file found for: %s", data_value) - # If no candidate file is found, return the original data value - return data_value + provided_path = pathlib.Path(data_value) + if provided_path.is_file(): + candidate_path = provided_path + elif ( + self._custom_attrs["data_context_dir"] is None + and image_path is not None + and ( + existing_image_from_path := pathlib.Path(image_path) + / pathlib.Path(data_value) + ).is_file() + ): + logger.debug("Found existing image from path: %s", existing_image_from_path) + candidate_path = existing_image_from_path + elif self._custom_attrs["data_context_dir"] is not None and ( + candidate_paths := list( + pathlib.Path(self._custom_attrs["data_context_dir"]).rglob(data_value) + ) + ): + logger.debug( + "Found candidate paths (and attempting to use the first): %s", + candidate_paths, + ) + candidate_path = candidate_paths[0] + else: + logger.debug("No candidate file found for: %s", data_value) + return layers - # read the image as an array - orig_image_array = imageio.imread(candidate_path) + try: + orig_image_array = imageio.imread(candidate_path) + except (FileNotFoundError, ValueError) as exc: + logger.error(exc) + return layers - # Adjust the image with image adjustment callable - # or adaptive histogram equalization if self._custom_attrs["image_adjustment"] is not None: logger.debug("Adjusting image with custom image adjustment function.") orig_image_array = self._custom_attrs["image_adjustment"]( @@ -974,12 +1348,11 @@ def process_image_data_as_html_display( # noqa: PLR0912, C901, PLR0915 brightness=self._custom_attrs["_widget_state"]["scale"], ) - # Normalize to 0-255 for image saving - orig_image_array = img_as_ubyte(orig_image_array) + orig_image_array = self._ensure_uint8(orig_image_array) + + original_image_copy = orig_image_array.copy() if include_original else None - prepared_image = None - # Step 2: Search for a mask - prepared_image = self.search_for_mask_or_outline( + prepared_image, mask_source_path = self.search_for_mask_or_outline( data_value=data_value, pattern_map=pattern_map, file_dir=self._custom_attrs["data_mask_context_dir"], @@ -988,10 +1361,8 @@ def process_image_data_as_html_display( # noqa: PLR0912, C901, PLR0915 mask=True, ) - # If no mask is found, proceed to search for an outline if prepared_image is None: - # Step 3: Search for an outline if no mask was found - prepared_image = self.search_for_mask_or_outline( + prepared_image, mask_source_path = self.search_for_mask_or_outline( data_value=data_value, pattern_map=pattern_map, file_dir=self._custom_attrs["data_outline_context_dir"], @@ -1000,11 +1371,27 @@ def process_image_data_as_html_display( # noqa: PLR0912, C901, PLR0915 mask=False, ) - # Step 4: If neither mask nor outline is found, use the original image array if prepared_image is None: prepared_image = orig_image_array - # Step 5: Add a red dot for the compartment center before cropping + mask_source_array = None + if include_mask_outline and mask_source_path is not None: + try: + loaded_mask = imageio.imread(mask_source_path) + if loaded_mask.ndim == 3: # noqa: PLR2004 + mask_gray = np.max(loaded_mask[..., :3], axis=2) + else: + mask_gray = loaded_mask + mask_binary = mask_gray > 0 + mask_uint8 = np.zeros(mask_binary.shape, dtype=np.uint8) + mask_uint8[mask_binary] = 255 + mask_source_array = mask_uint8 + except (FileNotFoundError, ValueError) as exc: + logger.error( + "Unable to read mask/outline image %s: %s", mask_source_path, exc + ) + mask_source_array = None + if ( compartment_center_xy is not None and self._custom_attrs.get("display_options", None) is None @@ -1012,10 +1399,8 @@ def process_image_data_as_html_display( # noqa: PLR0912, C901, PLR0915 self._custom_attrs.get("display_options", None) is not None and self._custom_attrs["display_options"].get("center_dot", True) ): - center_x, center_y = map(int, compartment_center_xy) # Ensure integers + center_x, center_y = map(int, compartment_center_xy) - # Convert grayscale image to RGB if necessary - # Check if the image is grayscale if len(prepared_image.shape) == 2: # noqa: PLR2004 prepared_image = skimage.color.gray2rgb(prepared_image) @@ -1023,70 +1408,63 @@ def process_image_data_as_html_display( # noqa: PLR0912, C901, PLR0915 0 <= center_y < prepared_image.shape[0] and 0 <= center_x < prepared_image.shape[1] ): - # Calculate the radius as a fraction of the bounding box size x_min, y_min, x_max, y_max = map(int, bounding_box) box_width = x_max - x_min box_height = y_max - y_min - radius = max( - 1, int(min(box_width, box_height) * 0.03) - ) # 3% of the smaller dimension + radius = max(1, int(min(box_width, box_height) * 0.03)) rr, cc = skimage.draw.disk( (center_y, center_x), radius=radius, shape=prepared_image.shape[:2] ) - prepared_image[rr, cc] = [255, 0, 0] # Red color in RGB + prepared_image[rr, cc] = [255, 0, 0] - # Step 6: Crop the image based on the bounding box and encode it to PNG format try: - # set a default bounding box x_min, y_min, x_max, y_max = map(int, bounding_box) - # if we have custom offset bounding box information, use it if self._custom_attrs.get("display_options", None) and self._custom_attrs[ "display_options" ].get("offset_bounding_box", None): - try: - # note: this will default to the nuclei centers based - # on earlier input for this parameter. - center_x, center_y = map(int, compartment_center_xy) + center_x, center_y = map(int, compartment_center_xy) - offset_bounding_box = self._custom_attrs["display_options"].get( - "offset_bounding_box" - ) - # generate offset bounding box positions - x_min, y_min, x_max, y_max = get_pixel_bbox_from_offsets( - center_x=center_x, - center_y=center_y, - rel_bbox=( - offset_bounding_box["x_min"], - offset_bounding_box["y_min"], - offset_bounding_box["x_max"], - offset_bounding_box["y_max"], - ), - ) - except IndexError: + offset_bounding_box = self._custom_attrs["display_options"].get( + "offset_bounding_box" + ) + x_min, y_min, x_max, y_max = get_pixel_bbox_from_offsets( + center_x=center_x, + center_y=center_y, + rel_bbox=( + offset_bounding_box["x_min"], + offset_bounding_box["y_min"], + offset_bounding_box["x_max"], + offset_bounding_box["y_max"], + ), + ) + + cropped_img_array = prepared_image[y_min:y_max, x_min:x_max] + + cropped_original = ( + original_image_copy[y_min:y_max, x_min:x_max] + if include_original and original_image_copy is not None + else None + ) + if include_mask_outline and mask_source_array is not None: + try: + cropped_mask = mask_source_array[y_min:y_max, x_min:x_max] + except Exception as exc: logger.debug( - ( - "Bounding box %s is out of bounds for image %s ." - " Defaulting to use bounding box from data." - ), - (x_min, y_min, x_max, y_max), - image_path, + "Failed to crop mask/outline array for %s: %s", + mask_source_path, + exc, ) + cropped_mask = None + else: + cropped_mask = None - cropped_img_array = prepared_image[ - y_min:y_max, x_min:x_max - ] # Perform slicing - - # Optionally add a scale bar to the cropped image try: display_options = self._custom_attrs.get("display_options", {}) or {} scale_cfg = display_options.get("scale_bar", None) - # Accept either a boolean (True -> use defaults) or a dict of options. if scale_cfg: - # microns-per-pixel can live in scale_cfg or in - # display_options for convenience um_per_pixel = None if isinstance(scale_cfg, dict): um_per_pixel = scale_cfg.get("um_per_pixel") or scale_cfg.get( @@ -1097,7 +1475,6 @@ def process_image_data_as_html_display( # noqa: PLR0912, C901, PLR0915 "um_per_pixel" ) or display_options.get("pixel_size_um") - # NEW: simple fallback for pixels_per_um / pixel_per_um (reciprocal) if um_per_pixel is None: ppu = None if isinstance(scale_cfg, dict): @@ -1114,10 +1491,9 @@ def process_image_data_as_html_display( # noqa: PLR0912, C901, PLR0915 if ppu > 0: um_per_pixel = 1.0 / ppu except (TypeError, ValueError): - pass # ignore bad input and skip adding a scale bar + pass if um_per_pixel: - # Default knobs (you can expose more) params = { "length_um": 10.0, "thickness_px": 4, @@ -1157,46 +1533,60 @@ def process_image_data_as_html_display( # noqa: PLR0912, C901, PLR0915 ) }, ) - except Exception as e: - logger.debug("Skipping scale bar due to error: %s", e) + except Exception as exc: + logger.debug("Skipping scale bar due to error: %s", exc) - except ValueError as e: + except ValueError as exc: raise ValueError( f"Bounding box contains invalid values: {bounding_box}" - ) from e - except IndexError as e: + ) from exc + except IndexError as exc: raise IndexError( f"Bounding box {bounding_box} is out of bounds for image dimensions " f"{prepared_image.shape}" - ) from e + ) from exc logger.debug("Cropped image array shape: %s", cropped_img_array.shape) - # Step 7: + if include_composite: + layers["composite"] = cropped_img_array + if include_original: + layers["original"] = cropped_original + if include_mask_outline: + layers["mask"] = cropped_mask + + return layers + + def _prepare_cropped_image_array( + self: CytoDataFrame_type, + data_value: Any, + bounding_box: Tuple[int, int, int, int], + compartment_center_xy: Optional[Tuple[int, int]] = None, + image_path: Optional[str] = None, + ) -> Optional[np.ndarray]: + layers = self._prepare_cropped_image_layers( + data_value=data_value, + bounding_box=bounding_box, + compartment_center_xy=compartment_center_xy, + image_path=image_path, + include_composite=True, + ) + return layers.get("composite") + + def _image_array_to_html(self: CytoDataFrame_type, image_array: np.ndarray) -> str: + """Encode an image array as an HTML tag.""" + try: - # Save cropped image to buffer png_bytes_io = BytesIO() - - # catch warnings about low contrast images and avoid displaying them with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) - imageio.imwrite(png_bytes_io, cropped_img_array, format="png") + imageio.imwrite(png_bytes_io, image_array, format="png") png_bytes = png_bytes_io.getvalue() - except (FileNotFoundError, ValueError) as exc: - # Handle errors if image processing fails logger.error(exc) - return data_value + raise - logger.debug("Image processed successfully and being sent to HTML for display.") - - # Step 8: Return HTML image display as a base64-encoded PNG - # we dynamically style the image so that it will be displayed based - # on automatic or user-based settings from the display_options custom - # attribute. - display_options = self._custom_attrs.get("display_options", {}) - if display_options is None: - display_options = {} + display_options = self._custom_attrs.get("display_options", {}) or {} width = display_options.get("width", "300px") height = display_options.get("height") @@ -1212,6 +1602,80 @@ def process_image_data_as_html_display( # noqa: PLR0912, C901, PLR0915 f'{base64_image_bytes}" style="{html_style_joined}"/>' ) + def process_ome_arrow_data_as_html_display( + self: CytoDataFrame_type, + data_value: Any, + ) -> str: + """Render an OME-Arrow struct as an HTML element.""" + + array = self._extract_array_from_ome_arrow(data_value) + if array is None: + return data_value + + try: + return self._image_array_to_html(array) + except Exception: + return data_value + + def process_image_data_as_html_display( + self: CytoDataFrame_type, + data_value: Any, + bounding_box: Tuple[int, int, int, int], + compartment_center_xy: Optional[Tuple[int, int]] = None, + image_path: Optional[str] = None, + ) -> str: + """ + Process the image data based on the provided data value + and bounding box, applying masks or outlines where + applicable, and return an HTML representation of the + cropped image for display. + + Args: + data_value (Any): + The value to search for in the file system or as the image data. + bounding_box (Tuple[int, int, int, int]): + The bounding box to crop the image. + compartment_center_xy (Optional[Tuple[int, int]]): + The center coordinates of the compartment. + image_path (Optional[str]): + The path to the image file. + + Returns: + str: + The HTML image display string, or the unmodified data + value if the image cannot be processed. + """ + + logger.debug( + ( + "Processing image data as HTML for display." + " Data value: %s , Bounding box: %s , " + "Compartment center xy: %s, Image path: %s" + ), + data_value, + bounding_box, + compartment_center_xy, + image_path, + ) + + data_value = str(data_value) + cropped_img_array = self._prepare_cropped_image_array( + data_value=data_value, + bounding_box=bounding_box, + compartment_center_xy=compartment_center_xy, + image_path=image_path, + ) + + if cropped_img_array is None: + return data_value + + logger.debug("Image processed successfully and being sent to HTML for display.") + + try: + return self._image_array_to_html(cropped_img_array) + except Exception: + return data_value + def get_displayed_rows(self: CytoDataFrame_type) -> List[int]: """ Get the indices of the rows that are currently @@ -1488,6 +1952,13 @@ def _generate_jupyter_dataframe_html( # noqa: C901, PLR0912, PLR0915 self._custom_attrs["data_image_paths"].columns.tolist(), axis=1 ) + ome_arrow_cols = self.find_ome_arrow_columns(data) + if ome_arrow_cols: + for ome_col in ome_arrow_cols: + data.loc[display_indices, ome_col] = data.loc[ + display_indices, ome_col + ].apply(self.process_ome_arrow_data_as_html_display) + if self._custom_attrs["is_transposed"]: # retranspose to return the # data in the shape expected @@ -1576,17 +2047,15 @@ def _repr_html_(self: CytoDataFrame_type, debug: bool = False) -> str: # if we're in a notebook process as though in a jupyter environment if get_option("display.notebook_repr_html") and not debug: - # Mount the VBox (slider + output) exactly once - if not self._custom_attrs["_widget_state"]["shown"]: - display( - widgets.VBox( - [ - self._custom_attrs["_scale_slider"], - self._custom_attrs["_output"], - ] - ) + display( + widgets.VBox( + [ + self._custom_attrs["_scale_slider"], + self._custom_attrs["_output"], + ] ) - self._custom_attrs["_widget_state"]["shown"] = True + ) + self._custom_attrs["_widget_state"]["shown"] = True # Attach the slider observer exactly once if not self._custom_attrs["_widget_state"]["observing"]: diff --git a/tests/test_frame.py b/tests/test_frame.py index 1d9f6fc..7eec24d 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -3,8 +3,12 @@ """ import pathlib +import sys +import types +import imageio.v2 as imageio import nbformat +import numpy as np import pandas as pd import pytest from _pytest.monkeypatch import MonkeyPatch @@ -17,6 +21,232 @@ ) +def test_to_ome_parquet_adds_arrow_column( + tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch +) -> None: + image_dir = tmp_path / "images" + image_dir.mkdir() + image_path = image_dir / "sample.tiff" + imageio.imwrite(image_path, np.zeros((10, 10), dtype=np.uint8)) + + data = pd.DataFrame( + { + "Image_FileName_DNA": [image_path.name], + "Image_PathName_DNA": [str(image_dir)], + "Cells_AreaShape_BoundingBoxMinimum_X": [0], + "Cells_AreaShape_BoundingBoxMinimum_Y": [0], + "Cells_AreaShape_BoundingBoxMaximum_X": [10], + "Cells_AreaShape_BoundingBoxMaximum_Y": [10], + } + ) + + cdf = CytoDataFrame(data=data) + + class TestOMEArrow: + def __init__(self, data: str): # noqa: ANN204 + self.data = data + + test_module = types.SimpleNamespace( + OMEArrow=TestOMEArrow, + __version__="test", + __spec__=types.SimpleNamespace(loader=None), + ) + monkeypatch.setitem(sys.modules, "ome_arrow", test_module) + + captured: dict = {} + + def fake_write_table(table, file_path, **kwargs): # noqa: ANN001, ANN202, ANN003 + captured["df"] = table.to_pandas() + captured["file_path"] = file_path + captured["kwargs"] = kwargs + captured["metadata"] = table.schema.metadata or {} + + monkeypatch.setattr("pyarrow.parquet.write_table", fake_write_table, raising=False) + + output_path = tmp_path / "out.parquet" + cdf.to_ome_parquet(output_path) + + composite_col = "Image_FileName_DNA_OMEArrow_COMP" + orig_col = "Image_FileName_DNA_OMEArrow_ORIG" + mask_col = "Image_FileName_DNA_OMEArrow_LABL" + for column in (composite_col, orig_col, mask_col): + assert column in captured["df"].columns + + comp_value = captured["df"].loc[0, composite_col] + orig_value = captured["df"].loc[0, orig_col] + mask_value = captured["df"].loc[0, mask_col] + + assert isinstance(comp_value, str) and comp_value.endswith(".tiff") + assert isinstance(orig_value, str) and orig_value.endswith(".tiff") + assert mask_value is None + assert captured["file_path"] == output_path + metadata = captured["metadata"] + assert metadata[b"cytodataframe:data-producer"] + assert metadata[b"cytodataframe:data-producer-version"] + + +def test_to_ome_parquet_real_data( + tmp_path: pathlib.Path, cytotable_NF1_data_parquet_shrunken: str +) -> None: + pytest.importorskip( + "ome_arrow", reason="to_ome_parquet real-data test requires ome-arrow" + ) + + parquet_path = pathlib.Path(cytotable_NF1_data_parquet_shrunken) + image_dir = parquet_path.parent / "Plate_2_images" + mask_dir = parquet_path.parent / "Plate_2_masks" + + cdf = CytoDataFrame( + data=cytotable_NF1_data_parquet_shrunken, + data_context_dir=str(image_dir), + data_mask_context_dir=str(mask_dir), + ) + + output_path = tmp_path / "nf1.ome.parquet" + image_cols = cdf.find_image_columns() + + cdf.to_ome_parquet(output_path) + + assert output_path.exists() + table = parquet.read_table(output_path) + expected_arrow_cols = [] + for col in image_cols: + expected_arrow_cols.extend( + [ + f"{col}_OMEArrow_COMP", + f"{col}_OMEArrow_ORIG", + f"{col}_OMEArrow_LABL", + ] + ) + for column in expected_arrow_cols: + assert column in table.column_names + + mask_cols = [f"{col}_OMEArrow_LABL" for col in image_cols] + mask_df = table.select(mask_cols).to_pandas() + assert mask_df.notna().any().any() + + +def test_to_ome_parquet_layer_flags( + tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch +) -> None: + image_dir = tmp_path / "images" + image_dir.mkdir() + image_path = image_dir / "sample.tiff" + imageio.imwrite(image_path, np.zeros((10, 10), dtype=np.uint8)) + + data = pd.DataFrame( + { + "Image_FileName_DNA": [image_path.name], + "Image_PathName_DNA": [str(image_dir)], + "Cells_AreaShape_BoundingBoxMinimum_X": [0], + "Cells_AreaShape_BoundingBoxMinimum_Y": [0], + "Cells_AreaShape_BoundingBoxMaximum_X": [10], + "Cells_AreaShape_BoundingBoxMaximum_Y": [10], + } + ) + + cdf = CytoDataFrame(data=data) + + class TestOMEArrow: + def __init__(self, data: str): # noqa: ANN204 + self.data = data + + test_module = types.SimpleNamespace( + OMEArrow=TestOMEArrow, + __version__="test", + __spec__=types.SimpleNamespace(loader=None), + ) + monkeypatch.setitem(sys.modules, "ome_arrow", test_module) + + captured: dict = {} + + def fake_write_table(table, file_path, **kwargs): # noqa: ANN001, ANN202, ANN003 + captured["df"] = table.to_pandas() + + monkeypatch.setattr("pyarrow.parquet.write_table", fake_write_table, raising=False) + + cdf.to_ome_parquet( + tmp_path / "out.parquet", + include_original=False, + include_mask_outline=False, + include_composite=True, + ) + + columns = captured["df"].columns + assert "Image_FileName_DNA_OMEArrow_COMP" in columns + assert "Image_FileName_DNA_OMEArrow_ORIG" not in columns + assert "Image_FileName_DNA_OMEArrow_LABL" not in columns + + +def test_ome_arrow_columns_render_html( + tmp_path: pathlib.Path, cytotable_NF1_data_parquet_shrunken: str +) -> None: + pytest.importorskip( + "ome_arrow", reason="OME-Arrow rendering test requires ome-arrow" + ) + + parquet_path = pathlib.Path(cytotable_NF1_data_parquet_shrunken) + image_dir = parquet_path.parent / "Plate_2_images" + mask_dir = parquet_path.parent / "Plate_2_masks" + + raw_cdf = CytoDataFrame( + data=cytotable_NF1_data_parquet_shrunken, + data_context_dir=str(image_dir), + data_mask_context_dir=str(mask_dir), + ) + + ome_path = tmp_path / "nf1.arrow.parquet" + raw_cdf.to_ome_parquet(ome_path) + + arrow_cdf = CytoDataFrame(data=ome_path) + arrow_cols = [col for col in arrow_cdf.columns if col.endswith("_OMEArrow_COMP")] + assert arrow_cols + + html_output = arrow_cdf[arrow_cols]._repr_html_(debug=True) + assert "data:image/png;base64" in html_output + + +def test_prepare_layers_mask_binary(tmp_path: pathlib.Path) -> None: + image_array = np.zeros((6, 6), dtype=np.uint8) + image_path = tmp_path / "cell.tiff" + imageio.imwrite(image_path, image_array) + + mask_array = np.zeros((6, 6, 3), dtype=np.uint8) + mask_array[1:4, 1:4] = (0, 255, 0) + mask_path = tmp_path / "cell_mask.png" + imageio.imwrite(mask_path, mask_array) + + data = pd.DataFrame( + { + "Image_FileName_DNA": ["cell.tiff"], + "Image_PathName_DNA": [str(tmp_path)], + "Cells_AreaShape_BoundingBoxMinimum_X": [0], + "Cells_AreaShape_BoundingBoxMinimum_Y": [0], + "Cells_AreaShape_BoundingBoxMaximum_X": [6], + "Cells_AreaShape_BoundingBoxMaximum_Y": [6], + } + ) + + cdf = CytoDataFrame( + data=data, + data_context_dir=str(tmp_path), + data_mask_context_dir=str(tmp_path), + ) + + layers = cdf._prepare_cropped_image_layers( + data_value="cell.tiff", + bounding_box=(0, 0, 6, 6), + include_mask_outline=True, + include_original=False, + include_composite=False, + ) + + mask_layer = layers["mask"] + assert mask_layer is not None + assert mask_layer.dtype == np.uint8 + assert set(np.unique(mask_layer).tolist()).issubset({0, 255}) + + def test_cytodataframe_input( tmp_path: pathlib.Path, basic_outlier_dataframe: pd.DataFrame, @@ -399,7 +629,7 @@ def test_slider_updates_state(monkeypatch: MonkeyPatch): widget state and triggers the render method. """ - # Minimal dummy dataframe + # Minimal test dataframe df = pd.DataFrame({"Image_FileName_DNA": ["example.tif"]}) cdf = CytoDataFrame(df)