Skip to content

Commit 82f018f

Browse files
authored
Upload changes to photoz notebook (#1236)
1 parent 3fa14b4 commit 82f018f

File tree

1 file changed

+137
-18
lines changed

1 file changed

+137
-18
lines changed

docs/tutorials/pre_executed/rubin_dp1_photoz.ipynb

Lines changed: 137 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
"\n",
1616
"In this tutorial, we will:\n",
1717
"- access a photo-z catalog derived from Rubin’s Data Preview 1 using LSDB (for data rights holders)\n",
18-
"- access the same catalog using pandas or any other Parquet reader (for all users)"
18+
"- access the same catalog using pandas or any other Parquet reader (for all users)\n",
19+
"- reconstruct the `qp.Ensemble` from the PDF nested columns"
1920
]
2021
},
2122
{
@@ -40,11 +41,11 @@
4041
"id": "ed4c15e1",
4142
"metadata": {
4243
"execution": {
43-
"iopub.execute_input": "2026-01-07T21:57:50.902229Z",
44-
"iopub.status.busy": "2026-01-07T21:57:50.901979Z",
45-
"iopub.status.idle": "2026-01-07T21:57:56.750842Z",
46-
"shell.execute_reply": "2026-01-07T21:57:56.750150Z",
47-
"shell.execute_reply.started": "2026-01-07T21:57:50.902209Z"
44+
"iopub.execute_input": "2026-01-29T19:36:21.186555Z",
45+
"iopub.status.busy": "2026-01-29T19:36:21.186365Z",
46+
"iopub.status.idle": "2026-01-29T19:36:29.010010Z",
47+
"shell.execute_reply": "2026-01-29T19:36:29.009172Z",
48+
"shell.execute_reply.started": "2026-01-29T19:36:21.186537Z"
4849
}
4950
},
5051
"outputs": [
@@ -606,11 +607,11 @@
606607
"id": "5bc65fbf",
607608
"metadata": {
608609
"execution": {
609-
"iopub.execute_input": "2026-01-07T21:57:58.183201Z",
610-
"iopub.status.busy": "2026-01-07T21:57:58.182757Z",
611-
"iopub.status.idle": "2026-01-07T21:58:05.427771Z",
612-
"shell.execute_reply": "2026-01-07T21:58:05.427212Z",
613-
"shell.execute_reply.started": "2026-01-07T21:57:58.183166Z"
610+
"iopub.execute_input": "2026-01-29T19:36:29.013280Z",
611+
"iopub.status.busy": "2026-01-29T19:36:29.012961Z",
612+
"iopub.status.idle": "2026-01-29T19:36:40.715891Z",
613+
"shell.execute_reply": "2026-01-29T19:36:40.715202Z",
614+
"shell.execute_reply.started": "2026-01-29T19:36:29.013247Z"
614615
}
615616
},
616617
"outputs": [
@@ -858,11 +859,11 @@
858859
"id": "e82467ec",
859860
"metadata": {
860861
"execution": {
861-
"iopub.execute_input": "2026-01-07T21:58:10.409295Z",
862-
"iopub.status.busy": "2026-01-07T21:58:10.409023Z",
863-
"iopub.status.idle": "2026-01-07T21:58:10.563053Z",
864-
"shell.execute_reply": "2026-01-07T21:58:10.562533Z",
865-
"shell.execute_reply.started": "2026-01-07T21:58:10.409276Z"
862+
"iopub.execute_input": "2026-01-29T19:36:40.718156Z",
863+
"iopub.status.busy": "2026-01-29T19:36:40.717943Z",
864+
"iopub.status.idle": "2026-01-29T19:36:40.869807Z",
865+
"shell.execute_reply": "2026-01-29T19:36:40.869230Z",
866+
"shell.execute_reply.started": "2026-01-29T19:36:40.718137Z"
866867
}
867868
},
868869
"outputs": [
@@ -899,16 +900,134 @@
899900
"plt.ylabel(\"kNN\")"
900901
]
901902
},
903+
{
904+
"cell_type": "markdown",
905+
"id": "1f86d413",
906+
"metadata": {},
907+
"source": [
908+
"## 3. Reconstructing a QP Ensemble\n",
909+
"\n",
910+
"Here we demonstrate how to reconstruct a dictionary of `qp.Ensemble` objects from the HATS catalog.\n",
911+
"\n",
912+
"Based on the column suffix (e.g., interp, mixmod, norm, hist, or quantile lengths like 99, 20, 5), we parse the nested data fields and rebuild each ensemble using the appropriate qp generator."
913+
]
914+
},
915+
{
916+
"cell_type": "code",
917+
"execution_count": 4,
918+
"id": "721b27c6-6dea-456d-8304-d8f297a1d60f",
919+
"metadata": {
920+
"execution": {
921+
"iopub.execute_input": "2026-01-29T19:36:40.872245Z",
922+
"iopub.status.busy": "2026-01-29T19:36:40.872024Z",
923+
"iopub.status.idle": "2026-01-29T19:36:41.327487Z",
924+
"shell.execute_reply": "2026-01-29T19:36:41.326924Z",
925+
"shell.execute_reply.started": "2026-01-29T19:36:40.872219Z"
926+
}
927+
},
928+
"outputs": [],
929+
"source": [
930+
"import qp\n",
931+
"import numpy as np\n",
932+
"\n",
933+
"\n",
934+
"def hats_to_qp(df):\n",
935+
" \"\"\"Reconstruct `qp.Ensemble` objects from a partition.\"\"\"\n",
936+
" ensembles = {}\n",
937+
"\n",
938+
" def extract(nf, subcol):\n",
939+
" return np.asarray([i[subcol] for i in nf])\n",
940+
"\n",
941+
" for col in df.nested_columns:\n",
942+
" nf = df[col]\n",
943+
" data_dict = {}\n",
944+
" ens_type = col.split(\"_\")[-1]\n",
945+
"\n",
946+
" match ens_type:\n",
947+
" case \"interp\":\n",
948+
" data_dict[\"yvals\"] = extract(nf, \"yvals\")\n",
949+
" data_dict[\"xvals\"] = np.asarray(nf.iloc[-1][\"xvals\"])\n",
950+
" gen_class = qp.interp_gen\n",
951+
" case \"mixmod\":\n",
952+
" data_dict[\"means\"] = extract(nf, \"means\")\n",
953+
" data_dict[\"stds\"] = extract(nf, \"stds\")\n",
954+
" data_dict[\"weights\"] = extract(nf, \"weights\")\n",
955+
" gen_class = qp.mixmod_gen\n",
956+
" case \"norm\":\n",
957+
" data_dict[\"loc\"] = extract(nf, \"loc\")\n",
958+
" data_dict[\"scale\"] = extract(nf, \"scale\")\n",
959+
" ensembles[col] = qp.Ensemble(qp.stats.norm, data=data_dict)\n",
960+
" gen_class = qp.stats.norm\n",
961+
" case \"hist\":\n",
962+
" data_dict[\"pdfs\"] = extract(nf, \"pdfs\")\n",
963+
" data_dict[\"bins\"] = np.linspace(0, 3, 301)\n",
964+
" gen_class = qp.hist_gen\n",
965+
" case \"99\" | \"20\" | \"5\":\n",
966+
" data_dict[\"locs\"] = extract(nf, \"locs\")\n",
967+
" data_dict[\"quants\"] = np.asarray(nf.iloc[-1][\"quants\"])\n",
968+
" gen_class = qp.quant_gen\n",
969+
" case _:\n",
970+
" continue\n",
971+
"\n",
972+
" ensembles[col] = qp.Ensemble(gen_class, data=data_dict)\n",
973+
"\n",
974+
" return ensembles"
975+
]
976+
},
977+
{
978+
"cell_type": "markdown",
979+
"id": "4cb0f989-18e9-4ab0-bb0e-272de609e211",
980+
"metadata": {},
981+
"source": [
982+
"Notice that computing the `qp.Ensemble` is very computationally expensive. For demonstration purposes we only used a handful of objects."
983+
]
984+
},
985+
{
986+
"cell_type": "code",
987+
"execution_count": 5,
988+
"id": "7b48b4b0-208c-4346-8ab6-1017f2072593",
989+
"metadata": {
990+
"execution": {
991+
"iopub.execute_input": "2026-01-29T19:36:41.330241Z",
992+
"iopub.status.busy": "2026-01-29T19:36:41.330050Z",
993+
"iopub.status.idle": "2026-01-29T19:36:48.725613Z",
994+
"shell.execute_reply": "2026-01-29T19:36:48.725023Z",
995+
"shell.execute_reply.started": "2026-01-29T19:36:41.330224Z"
996+
}
997+
},
998+
"outputs": [
999+
{
1000+
"data": {
1001+
"text/plain": [
1002+
"{'bpz_ens_interp': Ensemble(the_class=interp,shape=(5, 301)),\n",
1003+
" 'cmnn_ens_norm': Ensemble(the_class=norm,shape=(5, 1)),\n",
1004+
" 'dnf_ens_interp': Ensemble(the_class=interp,shape=(5, 301)),\n",
1005+
" 'fzboost_ens_interp': Ensemble(the_class=interp,shape=(5, 301)),\n",
1006+
" 'knn_ens_mixmod': Ensemble(the_class=mixmod,shape=(5, 10)),\n",
1007+
" 'lephare_ens_interp': Ensemble(the_class=interp,shape=(5, 301)),\n",
1008+
" 'tpz_ens_interp': Ensemble(the_class=interp,shape=(5, 301)),\n",
1009+
" 'gpz_ens_norm': Ensemble(the_class=norm,shape=(5, 1))}"
1010+
]
1011+
},
1012+
"execution_count": 5,
1013+
"metadata": {},
1014+
"output_type": "execute_result"
1015+
}
1016+
],
1017+
"source": [
1018+
"hats_to_qp(dp1_pz_catalog.head())"
1019+
]
1020+
},
9021021
{
9031022
"cell_type": "markdown",
9041023
"id": "7328abf3-7443-49a9-ac88-59b792f1b783",
9051024
"metadata": {},
9061025
"source": [
9071026
"## About\n",
9081027
"\n",
909-
"**Authors**: Sandro Campos\n",
1028+
"**Authors**: Sandro Campos, Sarah Pelesky, Tianqing Zhang\n",
9101029
"\n",
911-
"**Last run**: Jan 7, 2026\n",
1030+
"**Last run**: Jan 29, 2026\n",
9121031
"\n",
9131032
"If you use `lsdb` for published research, please cite following [instructions](https://docs.lsdb.io/en/stable/citation.html)."
9141033
]

0 commit comments

Comments
 (0)