Merge pull request #86 from bsipocz/euclid_cleanup_more_astropy

bsipocz · web-flow · commit 7ea238aa46cf · 2025-03-31T21:53:44.000-07:00
ENH: More cleanups for euclid notebooks: remove bytesIO and astropy-ify notebook 1&amp;5
diff --git a/tutorials/euclid_access/1_Euclid_intro_MER_images.md b/tutorials/euclid_access/1_Euclid_intro_MER_images.md
@@ -65,7 +65,6 @@ Each MER image is approximately 1.47 GB. Downloading can take some time.
 import re
 
 import numpy as np
-import pandas as pd
 
 import matplotlib.pyplot as plt
 from matplotlib.patches import Ellipse
@@ -80,9 +79,6 @@ from astropy import units as u
 
 from astroquery.ipac.irsa import Irsa
 import sep
-
-# Copy-on-write is more performant and avoids unexpected modifications of the original DataFrame.
-pd.options.mode.copy_on_write = True
 ```
 
 ## 1. Search for multiwavelength Euclid Q1 MER mosaics that cover the star HD 168151
@@ -123,20 +119,23 @@ science_images
 Note that 'access_estsize' is in units of kb
 
 ```{code-cell} ipython3
-filename = science_images[science_images['energy_bandpassname']=='VIS']['access_url'][0]
-filesize = science_images[science_images['energy_bandpassname']=='VIS']['access_estsize'][0]/1000000
-
+filename = science_images[science_images['energy_bandpassname'] == 'VIS']['access_url'][0]
+filesize = science_images[science_images['energy_bandpassname'] == 'VIS']['access_estsize'][0] / 1000000
 print(filename)
 
 print(f'Please note this image is {filesize} GB. With 230 Mbps internet download speed, it takes about 1 minute to download.')
 ```
 
+```{code-cell} ipython3
+science_images
+```
+
 ### Extract the tileID of this image from the filename
 
 ```{code-cell} ipython3
-tileID=re.search(r'TILE\s*(\d{9})', filename).group(1)
+tileID = science_images[science_images['energy_bandpassname'] == 'VIS']['obs_id'][0][:9]
 
-print('The MER tile ID for this object is :',tileID)
+print(f'The MER tile ID for this object is : {tileID}')
 ```
 
 Retrieve the MER image -- note this file is about 1.46 GB
@@ -146,7 +145,7 @@ fname = download_file(filename, cache=True)
 hdu_mer_irsa = fits.open(fname)
 print(hdu_mer_irsa.info())
 
-head_mer_irsa = hdu_mer_irsa[0].header
+header_mer_irsa = hdu_mer_irsa[0].header
 ```
 
 If you would like to save the MER mosaic to disk, uncomment the following cell.
@@ -160,21 +159,22 @@ Please also define a suitable download directory; by default it will be `data` a
 Have a look at the header information for this image.
 
 ```{code-cell} ipython3
-head_mer_irsa
+header_mer_irsa
 ```
 
 Lets extract just the primary image.
 
 ```{code-cell} ipython3
-im_mer_irsa=hdu_mer_irsa[0].data
+im_mer_irsa = hdu_mer_irsa[0].data
 
 print(im_mer_irsa.shape)
 ```
 
 Due to the large field of view of the MER mosaic, let's cut out a smaller section (2"x2")of the MER mosaic to inspect the image
 
 ```{code-cell} ipython3
-plt.imshow(im_mer_irsa[0:1200,0:1200], cmap='gray', origin='lower', norm=ImageNormalize(im_mer_irsa[0:1200,0:1200], interval=PercentileInterval(99.9), stretch=AsinhStretch()))
+plt.imshow(im_mer_irsa[0:1200,0:1200], cmap='gray', origin='lower',
+           norm=ImageNormalize(im_mer_irsa[0:1200,0:1200], interval=PercentileInterval(99.9), stretch=AsinhStretch()))
 colorbar = plt.colorbar()
 ```
 
@@ -203,21 +203,20 @@ urls
 Create an array with the instrument and filter name so we can add this to the plots.
 
 ```{code-cell} ipython3
-df_im_euclid.loc[:, "filters"] = df_im_euclid["instrument_name"] + "_" + df_im_euclid["energy_bandpassname"]
+science_images['filters'] = science_images['instrument_name'] + "_" + science_images['energy_bandpassname']
 
-## Note that VIS_VIS appears in the filters, so update that filter to just say VIS
-df_im_euclid.loc[df_im_euclid["filters"] == "VIS_VIS", "filters"] = "VIS"
+# VIS_VIS appears in the filters, so update that filter to just say VIS
+science_images['filters'][science_images['filters']== 'VIS_VIS'] = "VIS"
 
-filters = df_im_euclid['filters'].to_numpy()
-filters
+science_images['filters']
 ```
 
 ## The image above is very large, so let's cut out a smaller image to inspect these data.
 
 ```{code-cell} ipython3
 ######################## User defined section ############################
 ## How large do you want the image cutout to be?
-im_cutout= 1.0 * u.arcmin
+im_cutout = 1.0 * u.arcmin
 
 ## What is the center of the cutout?
 ## For now choosing a random location on the image
@@ -229,7 +228,7 @@ dec =  64.525
 # ra = 273.474451
 # dec = 64.397273
 
-coords_cutout = SkyCoord(ra, dec, unit=(u.deg, u.deg), frame='icrs')
+coords_cutout = SkyCoord(ra, dec, unit='deg', frame='icrs')
 
 ##########################################################################
 
@@ -275,7 +274,7 @@ rows = -(-num_images // columns)
 fig, axes = plt.subplots(rows, columns, figsize=(4 * columns, 4 * rows), subplot_kw={'projection': WCS(final_hdulist[0].header)})
 axes = axes.flatten()
 
-for idx, (ax, filt) in enumerate(zip(axes, filters)):
+for idx, (ax, filt) in enumerate(zip(axes, science_images['filters'])):
     image_data = final_hdulist[idx].data
     norm = ImageNormalize(image_data, interval=PercentileInterval(99.9), stretch=AsinhStretch())
     ax.imshow(image_data, cmap='gray', origin='lower', norm=norm)
@@ -296,13 +295,9 @@ plt.show()
 First we list all the filters so you can choose which cutout you want to extract sources on. We will choose VIS.
 
 ```{code-cell} ipython3
-filters
-```
-
-```{code-cell} ipython3
-filt_index = np.where(filters == 'VIS')[0][0]
+filt_index = np.where(science_images['filters'] == 'VIS')[0][0]
 
-img1=final_hdulist[filt_index].data
+img1 = final_hdulist[filt_index].data
 ```
 
 ### Extract some sources from the cutout using sep (python package based on source extractor)
@@ -386,8 +381,8 @@ for i in range(len(sources_thr)):
 
 ## About this Notebook
 
-**Author**: Tiffany Meshkat (IPAC Scientist)
+**Author**: Tiffany Meshkat, Anahita Alavi, Anastasia Laity, Andreas Faisst, Brigitta Sipőcz, Dan Masters, Harry Teplitz, Jaladh Singhal, Shoubaneh Hemmati, Vandana Desai
 
-**Updated**: 2025-03-19
+**Updated**: 2025-03-31
 
 **Contact:** [the IRSA Helpdesk](https://irsa.ipac.caltech.edu/docs/help_desk.html) with questions or reporting problems.
diff --git a/tutorials/euclid_access/3_Euclid_intro_1D_spectra.md b/tutorials/euclid_access/3_Euclid_intro_1D_spectra.md
@@ -113,13 +113,13 @@ Open the large FITS file without loading it entirely into memory, pulling out ju
 
 ```{code-cell} ipython3
 with fits.open(file_uri) as hdul:
-    spectra = QTable.read(hdul[result['hdu'][0]], format='fits')
+    spectrum = QTable.read(hdul[result['hdu'][0]], format='fits')
 
     spec_header = hdul[result['hdu'][0]].header
 ```
 
 ```{code-cell} ipython3
-spectra
+spectrum
 ```
 
 ```{code-cell} ipython3
@@ -145,25 +145,25 @@ The 1D combined spectra table contains 6 columns, below are a few highlights:
 ```
 
 ```{code-cell} ipython3
-signal_scaled = spectra['SIGNAL'] * spec_header['FSCALE']
+signal_scaled = spectrum['SIGNAL'] * spec_header['FSCALE']
 ```
 
 We investigate the MASK column to see which flux bins are recommended to keep vs "Do Not Use"
 
 ```{code-cell} ipython3
-plt.plot(spectra['WAVELENGTH'].to(u.micron), spectra['MASK'])
+plt.plot(spectrum['WAVELENGTH'].to(u.micron), spectrum['MASK'])
 plt.ylabel('Mask value')
 plt.title('Values of MASK by flux bin')
 ```
 
 We use the MASK column to create a boolean mask for values to ignore. We use the inverse of this mask to mark the flux bins to use.
 
 ```{code-cell} ipython3
-bad_mask = (spectra['MASK'].value % 2 == 1) | (spectra['MASK'].value >= 64)
+bad_mask = (spectrum['MASK'].value % 2 == 1) | (spectrum['MASK'].value >= 64)
 
-plt.plot(spectra['WAVELENGTH'].to(u.micron), np.ma.masked_where(bad_mask, signal_scaled), color='black', label='Spectrum')
-plt.plot(spectra['WAVELENGTH'], np.ma.masked_where(~bad_mask, signal_scaled), color='red', label='Do not use')
-plt.plot(spectra['WAVELENGTH'], np.sqrt(spectra['VAR']) * spec_header['FSCALE'], color='grey', label='Error')
+plt.plot(spectrum['WAVELENGTH'].to(u.micron), np.ma.masked_where(bad_mask, signal_scaled), color='black', label='Spectrum')
+plt.plot(spectrum['WAVELENGTH'], np.ma.masked_where(~bad_mask, signal_scaled), color='red', label='Do not use')
+plt.plot(spectrum['WAVELENGTH'], np.sqrt(spectrum['VAR']) * spec_header['FSCALE'], color='grey', label='Error')
 
 plt.legend(loc='upper right')
 plt.ylim(-0.15E-16, 0.25E-16)
diff --git a/tutorials/euclid_access/4_Euclid_intro_PHZ_catalog.md b/tutorials/euclid_access/4_Euclid_intro_PHZ_catalog.md
@@ -51,15 +51,14 @@ If you have questions about this notebook, please contact the [IRSA helpdesk](ht
 
 ```{code-cell} ipython3
 # Uncomment the next line to install dependencies if needed.
-# !pip install requests matplotlib pandas 'astropy>=5.3' 'astroquery>=0.4.10' fsspec firefly_client
+# !pip install matplotlib pandas 'astropy>=5.3' 'astroquery>=0.4.10' fsspec firefly_client
 ```
 
 ```{code-cell} ipython3
-from io import BytesIO
 import os
 import re
+import urllib
 
-import requests
 import matplotlib.pyplot as plt
 
 from astropy.coordinates import SkyCoord
@@ -324,20 +323,15 @@ df2=result2.to_table().to_pandas()
 df2
 ```
 
-```{code-cell} ipython3
-## Create the full filename/url
-irsa_url='https://irsa.ipac.caltech.edu/'
+Pull out the file name from the ``result`` table:
 
-file_url=irsa_url+df2['uri'].iloc[0]
-file_url
+```{code-cell} ipython3
+file_uri = urllib.parse.urljoin(Irsa.tap_url, result2['uri'][0])
+file_uri
 ```
 
 ```{code-cell} ipython3
-## Open the large FITS file without loading it entirely into memory
-## pulling out just the extension we want for the 1D spectra of our object
-response = requests.get(file_url)
-
-with fits.open(BytesIO(response.content), memmap=True) as hdul:
+with fits.open(file_uri) as hdul:
     hdu = hdul[df2['hdu'].iloc[0]]
     dat = Table.read(hdu, format='fits', hdu=1)
     df_obj_irsa = dat.to_pandas()
diff --git a/tutorials/euclid_access/5_Euclid_intro_SPE_catalog.md b/tutorials/euclid_access/5_Euclid_intro_SPE_catalog.md