diff --git a/tutorials/euclid_access/1_Euclid_intro_MER_images.md b/tutorials/euclid_access/1_Euclid_intro_MER_images.md index 73544f5a..cf8f3161 100644 --- a/tutorials/euclid_access/1_Euclid_intro_MER_images.md +++ b/tutorials/euclid_access/1_Euclid_intro_MER_images.md @@ -65,7 +65,6 @@ Each MER image is approximately 1.47 GB. Downloading can take some time. import re import numpy as np -import pandas as pd import matplotlib.pyplot as plt from matplotlib.patches import Ellipse @@ -80,9 +79,6 @@ from astropy import units as u from astroquery.ipac.irsa import Irsa import sep - -# Copy-on-write is more performant and avoids unexpected modifications of the original DataFrame. -pd.options.mode.copy_on_write = True ``` ## 1. Search for multiwavelength Euclid Q1 MER mosaics that cover the star HD 168151 @@ -123,20 +119,23 @@ science_images Note that 'access_estsize' is in units of kb ```{code-cell} ipython3 -filename = science_images[science_images['energy_bandpassname']=='VIS']['access_url'][0] -filesize = science_images[science_images['energy_bandpassname']=='VIS']['access_estsize'][0]/1000000 - +filename = science_images[science_images['energy_bandpassname'] == 'VIS']['access_url'][0] +filesize = science_images[science_images['energy_bandpassname'] == 'VIS']['access_estsize'][0] / 1000000 print(filename) print(f'Please note this image is {filesize} GB. With 230 Mbps internet download speed, it takes about 1 minute to download.') ``` +```{code-cell} ipython3 +science_images +``` + ### Extract the tileID of this image from the filename ```{code-cell} ipython3 -tileID=re.search(r'TILE\s*(\d{9})', filename).group(1) +tileID = science_images[science_images['energy_bandpassname'] == 'VIS']['obs_id'][0][:9] -print('The MER tile ID for this object is :',tileID) +print(f'The MER tile ID for this object is : {tileID}') ``` Retrieve the MER image -- note this file is about 1.46 GB @@ -146,7 +145,7 @@ fname = download_file(filename, cache=True) hdu_mer_irsa = fits.open(fname) print(hdu_mer_irsa.info()) -head_mer_irsa = hdu_mer_irsa[0].header +header_mer_irsa = hdu_mer_irsa[0].header ``` If you would like to save the MER mosaic to disk, uncomment the following cell. @@ -160,13 +159,13 @@ Please also define a suitable download directory; by default it will be `data` a Have a look at the header information for this image. ```{code-cell} ipython3 -head_mer_irsa +header_mer_irsa ``` Lets extract just the primary image. ```{code-cell} ipython3 -im_mer_irsa=hdu_mer_irsa[0].data +im_mer_irsa = hdu_mer_irsa[0].data print(im_mer_irsa.shape) ``` @@ -174,7 +173,8 @@ print(im_mer_irsa.shape) Due to the large field of view of the MER mosaic, let's cut out a smaller section (2"x2")of the MER mosaic to inspect the image ```{code-cell} ipython3 -plt.imshow(im_mer_irsa[0:1200,0:1200], cmap='gray', origin='lower', norm=ImageNormalize(im_mer_irsa[0:1200,0:1200], interval=PercentileInterval(99.9), stretch=AsinhStretch())) +plt.imshow(im_mer_irsa[0:1200,0:1200], cmap='gray', origin='lower', + norm=ImageNormalize(im_mer_irsa[0:1200,0:1200], interval=PercentileInterval(99.9), stretch=AsinhStretch())) colorbar = plt.colorbar() ``` @@ -203,13 +203,12 @@ urls Create an array with the instrument and filter name so we can add this to the plots. ```{code-cell} ipython3 -df_im_euclid.loc[:, "filters"] = df_im_euclid["instrument_name"] + "_" + df_im_euclid["energy_bandpassname"] +science_images['filters'] = science_images['instrument_name'] + "_" + science_images['energy_bandpassname'] -## Note that VIS_VIS appears in the filters, so update that filter to just say VIS -df_im_euclid.loc[df_im_euclid["filters"] == "VIS_VIS", "filters"] = "VIS" +# VIS_VIS appears in the filters, so update that filter to just say VIS +science_images['filters'][science_images['filters']== 'VIS_VIS'] = "VIS" -filters = df_im_euclid['filters'].to_numpy() -filters +science_images['filters'] ``` ## The image above is very large, so let's cut out a smaller image to inspect these data. @@ -217,7 +216,7 @@ filters ```{code-cell} ipython3 ######################## User defined section ############################ ## How large do you want the image cutout to be? -im_cutout= 1.0 * u.arcmin +im_cutout = 1.0 * u.arcmin ## What is the center of the cutout? ## For now choosing a random location on the image @@ -229,7 +228,7 @@ dec = 64.525 # ra = 273.474451 # dec = 64.397273 -coords_cutout = SkyCoord(ra, dec, unit=(u.deg, u.deg), frame='icrs') +coords_cutout = SkyCoord(ra, dec, unit='deg', frame='icrs') ########################################################################## @@ -275,7 +274,7 @@ rows = -(-num_images // columns) fig, axes = plt.subplots(rows, columns, figsize=(4 * columns, 4 * rows), subplot_kw={'projection': WCS(final_hdulist[0].header)}) axes = axes.flatten() -for idx, (ax, filt) in enumerate(zip(axes, filters)): +for idx, (ax, filt) in enumerate(zip(axes, science_images['filters'])): image_data = final_hdulist[idx].data norm = ImageNormalize(image_data, interval=PercentileInterval(99.9), stretch=AsinhStretch()) ax.imshow(image_data, cmap='gray', origin='lower', norm=norm) @@ -296,13 +295,9 @@ plt.show() First we list all the filters so you can choose which cutout you want to extract sources on. We will choose VIS. ```{code-cell} ipython3 -filters -``` - -```{code-cell} ipython3 -filt_index = np.where(filters == 'VIS')[0][0] +filt_index = np.where(science_images['filters'] == 'VIS')[0][0] -img1=final_hdulist[filt_index].data +img1 = final_hdulist[filt_index].data ``` ### Extract some sources from the cutout using sep (python package based on source extractor) @@ -386,8 +381,8 @@ for i in range(len(sources_thr)): ## About this Notebook -**Author**: Tiffany Meshkat (IPAC Scientist) +**Author**: Tiffany Meshkat, Anahita Alavi, Anastasia Laity, Andreas Faisst, Brigitta Sipőcz, Dan Masters, Harry Teplitz, Jaladh Singhal, Shoubaneh Hemmati, Vandana Desai -**Updated**: 2025-03-19 +**Updated**: 2025-03-31 **Contact:** [the IRSA Helpdesk](https://irsa.ipac.caltech.edu/docs/help_desk.html) with questions or reporting problems. diff --git a/tutorials/euclid_access/3_Euclid_intro_1D_spectra.md b/tutorials/euclid_access/3_Euclid_intro_1D_spectra.md index 801620e2..fcf252e7 100644 --- a/tutorials/euclid_access/3_Euclid_intro_1D_spectra.md +++ b/tutorials/euclid_access/3_Euclid_intro_1D_spectra.md @@ -113,13 +113,13 @@ Open the large FITS file without loading it entirely into memory, pulling out ju ```{code-cell} ipython3 with fits.open(file_uri) as hdul: - spectra = QTable.read(hdul[result['hdu'][0]], format='fits') + spectrum = QTable.read(hdul[result['hdu'][0]], format='fits') spec_header = hdul[result['hdu'][0]].header ``` ```{code-cell} ipython3 -spectra +spectrum ``` ```{code-cell} ipython3 @@ -145,13 +145,13 @@ The 1D combined spectra table contains 6 columns, below are a few highlights: ``` ```{code-cell} ipython3 -signal_scaled = spectra['SIGNAL'] * spec_header['FSCALE'] +signal_scaled = spectrum['SIGNAL'] * spec_header['FSCALE'] ``` We investigate the MASK column to see which flux bins are recommended to keep vs "Do Not Use" ```{code-cell} ipython3 -plt.plot(spectra['WAVELENGTH'].to(u.micron), spectra['MASK']) +plt.plot(spectrum['WAVELENGTH'].to(u.micron), spectrum['MASK']) plt.ylabel('Mask value') plt.title('Values of MASK by flux bin') ``` @@ -159,11 +159,11 @@ plt.title('Values of MASK by flux bin') We use the MASK column to create a boolean mask for values to ignore. We use the inverse of this mask to mark the flux bins to use. ```{code-cell} ipython3 -bad_mask = (spectra['MASK'].value % 2 == 1) | (spectra['MASK'].value >= 64) +bad_mask = (spectrum['MASK'].value % 2 == 1) | (spectrum['MASK'].value >= 64) -plt.plot(spectra['WAVELENGTH'].to(u.micron), np.ma.masked_where(bad_mask, signal_scaled), color='black', label='Spectrum') -plt.plot(spectra['WAVELENGTH'], np.ma.masked_where(~bad_mask, signal_scaled), color='red', label='Do not use') -plt.plot(spectra['WAVELENGTH'], np.sqrt(spectra['VAR']) * spec_header['FSCALE'], color='grey', label='Error') +plt.plot(spectrum['WAVELENGTH'].to(u.micron), np.ma.masked_where(bad_mask, signal_scaled), color='black', label='Spectrum') +plt.plot(spectrum['WAVELENGTH'], np.ma.masked_where(~bad_mask, signal_scaled), color='red', label='Do not use') +plt.plot(spectrum['WAVELENGTH'], np.sqrt(spectrum['VAR']) * spec_header['FSCALE'], color='grey', label='Error') plt.legend(loc='upper right') plt.ylim(-0.15E-16, 0.25E-16) diff --git a/tutorials/euclid_access/4_Euclid_intro_PHZ_catalog.md b/tutorials/euclid_access/4_Euclid_intro_PHZ_catalog.md index f0c2d8f2..ad602914 100644 --- a/tutorials/euclid_access/4_Euclid_intro_PHZ_catalog.md +++ b/tutorials/euclid_access/4_Euclid_intro_PHZ_catalog.md @@ -51,15 +51,14 @@ If you have questions about this notebook, please contact the [IRSA helpdesk](ht ```{code-cell} ipython3 # Uncomment the next line to install dependencies if needed. -# !pip install requests matplotlib pandas 'astropy>=5.3' 'astroquery>=0.4.10' fsspec firefly_client +# !pip install matplotlib pandas 'astropy>=5.3' 'astroquery>=0.4.10' fsspec firefly_client ``` ```{code-cell} ipython3 -from io import BytesIO import os import re +import urllib -import requests import matplotlib.pyplot as plt from astropy.coordinates import SkyCoord @@ -324,20 +323,15 @@ df2=result2.to_table().to_pandas() df2 ``` -```{code-cell} ipython3 -## Create the full filename/url -irsa_url='https://irsa.ipac.caltech.edu/' +Pull out the file name from the ``result`` table: -file_url=irsa_url+df2['uri'].iloc[0] -file_url +```{code-cell} ipython3 +file_uri = urllib.parse.urljoin(Irsa.tap_url, result2['uri'][0]) +file_uri ``` ```{code-cell} ipython3 -## Open the large FITS file without loading it entirely into memory -## pulling out just the extension we want for the 1D spectra of our object -response = requests.get(file_url) - -with fits.open(BytesIO(response.content), memmap=True) as hdul: +with fits.open(file_uri) as hdul: hdu = hdul[df2['hdu'].iloc[0]] dat = Table.read(hdu, format='fits', hdu=1) df_obj_irsa = dat.to_pandas() diff --git a/tutorials/euclid_access/5_Euclid_intro_SPE_catalog.md b/tutorials/euclid_access/5_Euclid_intro_SPE_catalog.md index fb2b954c..667973ba 100644 --- a/tutorials/euclid_access/5_Euclid_intro_SPE_catalog.md +++ b/tutorials/euclid_access/5_Euclid_intro_SPE_catalog.md @@ -50,24 +50,22 @@ If you have questions about this notebook, please contact the [IRSA helpdesk](ht ```{code-cell} ipython3 # Uncomment the next line to install dependencies if needed -# !pip install matplotlib pandas astropy 'astroquery>=0.4.10' +# !pip install matplotlib astropy 'astroquery>=0.4.10' ``` ```{code-cell} ipython3 -from io import BytesIO import re +import urllib import matplotlib.pyplot as plt import numpy as np -import pandas as pd -import requests from astropy.coordinates import SkyCoord from astropy.io import fits -from astropy.table import Table +from astropy.table import QTable from astropy import units as u from astropy.utils.data import download_file -from astropy.visualization import ImageNormalize, PercentileInterval, AsinhStretch +from astropy.visualization import ImageNormalize, PercentileInterval, AsinhStretch, quantity_support from astroquery.ipac.irsa import Irsa ``` @@ -81,41 +79,41 @@ search_radius = 10 * u.arcsec coord = SkyCoord.from_name('HD 168151') ``` -### Use IRSA to search for all Euclid data on this target +```{tip} +The IRSA SIA collections can be listed using using the ``list_collections`` method, we can filter on the ones containing "euclid" in the collection name: -This searches specifically in the euclid_DpdMerBksMosaic "collection" which is the MER images and catalogs. + Irsa.list_collections(filter='euclid') +``` -```{code-cell} ipython3 -im_table = Irsa.query_sia(pos=(coord, search_radius), collection='euclid_DpdMerBksMosaic') ++++ -## Convert the table to pandas dataframe -df_im_irsa=im_table.to_pandas() -``` +### Use IRSA to search for all Euclid data on this target + +This searches specifically in the ``euclid_DpdMerBksMosaic`` collection which is the MER images and catalogs. ```{code-cell} ipython3 -## Change the settings so we can see all the columns in the dataframe and the full column width -## (to see the full long URL) -pd.set_option('display.max_columns', None) -pd.set_option('display.max_colwidth', None) +image_table = Irsa.query_sia(pos=(coord, search_radius), collection='euclid_DpdMerBksMosaic') ``` -#### This dataframe contains other non-Euclid datasets that have been "Euclidized", meaning they have been put on the same pixel scale as the Euclid data. For this example we just want to look at the Euclid data, so select Euclid for the facility name, and choose science as the data product subtype. +This table lists all MER mosaic images available in this search position. These mosaics include the Euclid VIS, Y, J, H images, as well as ground-based telescopes which have been put on the same pixel scale. For more information, see the [Euclid documentation at IPAC](https://euclid.caltech.edu/page/euclid-faq-tech/). -```{code-cell} ipython3 -df_im_euclid=df_im_irsa[ (df_im_irsa['dataproduct_subtype']=='science') & (df_im_irsa['facility_name']=='Euclid')] +Note that there are various image types are returned as well, we filter out the `science` images from these: -df_im_euclid.head() +```{code-cell} ipython3 +science_images = image_table[image_table['dataproduct_subtype'] == 'science'] +science_images ``` -## Choose the VIS image and pull the filename: +### Choose the VIS image and pull the Tile ID -```{code-cell} ipython3 -filename=df_im_euclid[df_im_euclid['energy_bandpassname']=='VIS']['access_url'].to_list()[0] ++++ + +Extract the tile ID from the ``obs_id`` column. The values in this column are made a combination of the 9 digit tile ID and the abbreviation of the instrument. -# ## Extract the tileID from the filename -tileID=re.search(r'TILE\s*(\d{9})', filename).group(1) +```{code-cell} ipython3 +tileID = science_images[science_images['energy_bandpassname'] == 'VIS']['obs_id'][0][:9] -print('The MER tile ID for this object is :',tileID) +print(f'The MER tile ID for this object is : {tileID}') ``` ## 2. Download SPE catalog from IRSA directly to this notebook @@ -138,10 +136,14 @@ table_lines = 'euclid_q1_spe_lines_line_features' - List the column names ```{code-cell} ipython3 -columns_info = Irsa.list_columns(catalog=table_lines) +columns_info = Irsa.list_columns(catalog=table_mer) print(len(columns_info)) ``` +```{code-cell} ipython3 +Irsa.list_columns(catalog=table_1dspectra, full=True) +``` + ```{code-cell} ipython3 # Full list of columns and their description columns_info @@ -160,32 +162,29 @@ We specify the following conditions on our search: Finally we sort the data by descending spe_line_snr_gf to have the largest SNR H-alpha lines detected at the top. ```{code-cell} ipython3 -adql = f"SELECT DISTINCT mer.object_id,mer.ra, mer.dec, mer.tileid, mer.flux_y_templfit, \ -lines.spe_line_snr_gf,lines.spe_line_snr_di, lines.spe_line_name, lines.spe_line_central_wl_gf,\ -lines.spe_line_ew_gf, galaxy.spe_z_err, galaxy.spe_z,galaxy.spe_z_prob, lines.spe_line_flux_gf, lines.spe_line_flux_err_gf \ -FROM {table_mer} AS mer \ -JOIN {table_lines} AS lines \ -ON mer.object_id = lines.object_id \ -JOIN {table_galaxy_candidates} AS galaxy \ -ON lines.object_id = galaxy.object_id AND lines.spe_rank = galaxy.spe_rank \ -WHERE lines.spe_line_snr_gf >5 \ -AND lines.spe_line_name = 'Halpha' \ -AND mer.tileid = {tileID} \ -AND galaxy.spe_z_prob > 0.99 \ -AND galaxy.spe_z BETWEEN 1.4 AND 1.6 \ -AND lines.spe_line_flux_gf > 2E-16 \ -ORDER BY lines.spe_line_snr_gf DESC \ -" +adql_query = ("SELECT DISTINCT mer.object_id,mer.ra, mer.dec, mer.tileid, mer.flux_y_templfit, " + "lines.spe_line_snr_gf,lines.spe_line_snr_di, lines.spe_line_name, lines.spe_line_central_wl_gf, " + "lines.spe_line_ew_gf, galaxy.spe_z_err, galaxy.spe_z,galaxy.spe_z_prob, " + "lines.spe_line_flux_gf, lines.spe_line_flux_err_gf " + f"FROM {table_mer} AS mer " + f"JOIN {table_lines} AS lines " + "ON mer.object_id = lines.object_id " + f"JOIN {table_galaxy_candidates} AS galaxy " + "ON lines.object_id = galaxy.object_id AND lines.spe_rank = galaxy.spe_rank " + "WHERE lines.spe_line_snr_gf >5 " + "AND lines.spe_line_name = 'Halpha' " + f"AND mer.tileid = {tileID} " + "AND galaxy.spe_z_prob > 0.99 " + "AND galaxy.spe_z BETWEEN 1.4 AND 1.6 " + "AND lines.spe_line_flux_gf > 2E-16 " + "ORDER BY lines.spe_line_snr_gf DESC ") # Use TAP with this ADQL string -result = Irsa.query_tap(adql) - -# Convert table to pandas dataframe and drop duplicates -result_table = result.to_qtable() +result_table = Irsa.query_tap(adql_query).to_qtable() result_table['spe_line_flux_gf'].info.format = ".8e" # Scientific notation with 8 decimal places result_table['spe_line_flux_err_gf'].info.format = ".8e" -result_table['object_id'] = result['object_id'].astype('int64') +result_table['object_id'] = result_table['object_id'].astype('int64') ``` ### Choose an object of interest, lets look at an object with a strong Halpha line detected with high SNR. @@ -193,9 +192,9 @@ result_table['object_id'] = result['object_id'].astype('int64') ```{code-cell} ipython3 obj_id = 2737659721646729968 -obj_tab = result_table[(result_table['object_id'] == obj_id)] +obj_row = result_table[(result_table['object_id'] == obj_id)] -obj_tab +obj_row ``` ### Pull the spectrum of this object @@ -203,9 +202,7 @@ obj_tab ```{code-cell} ipython3 adql_object = f"SELECT * FROM {table_1dspectra} WHERE objectid = {obj_id}" -result2 = Irsa.query_tap(adql_object) -df2 = result2.to_table().to_pandas() -df2 +result_table2 = Irsa.query_tap(adql_object).to_qtable() ``` ### The following steps to read in the spectrum follows the 3_Euclid_intro_1D_spectra notebook. @@ -213,44 +210,45 @@ df2 This involves reading in the spectrum without readin in the full FITS file, just pulling the extension we want. ```{code-cell} ipython3 -irsa_url = 'https://irsa.ipac.caltech.edu/' - -file_url = irsa_url + df2['uri'].iloc[0] -file_url - -response = requests.get(file_url) +file_uri = urllib.parse.urljoin(Irsa.tap_url, result_table2['uri'][0]) +file_uri +``` -with fits.open(BytesIO(response.content), memmap=True) as hdul: - hdu = hdul[df2['hdu'].iloc[0]] - dat = Table.read(hdu, format='fits', hdu=1) - df_obj_irsa = dat.to_pandas() +```{code-cell} ipython3 +with fits.open(file_uri) as hdul: + spectrum = QTable.read(hdul[result_table2['hdu'][0]], format='fits') + spec_header = hdul[result_table2['hdu'][0]].header ``` ### Now the data are read in, plot the spectrum with the H-alpha line labeled -Divide by 10000 to convert from Angstrom to micron +```{tip} +As we use astropy.visualization's ``quantity_support``, matplotlib automatically picks up the axis units from the quantitites we plot. +``` + +```{code-cell} ipython3 +quantity_support() +``` ```{code-cell} ipython3 -wavelengths = obj_tab['spe_line_central_wl_gf']/10000. -line_names = obj_tab['spe_line_name'] -snr_gf = obj_tab['spe_line_snr_gf'] +# Note that the units are missing from the lines table, we manually add Angstrom +line_wavelengths = obj_row['spe_line_central_wl_gf'] * u.angstrom +line_names = obj_row['spe_line_name'] +snr_gf = obj_row['spe_line_snr_gf'] -plt.plot(df_obj_irsa['WAVELENGTH']/10000., df_obj_irsa['SIGNAL']) +plt.plot(spectrum['WAVELENGTH'].to(u.micron), spectrum['SIGNAL']) -for wl, name, snr in zip(np.atleast_1d(wavelengths), np.atleast_1d(line_names), np.atleast_1d(snr_gf)): +for wl, name, snr in zip(np.atleast_1d(line_wavelengths), np.atleast_1d(line_names), np.atleast_1d(snr_gf)): plt.axvline(wl, color='b', linestyle='--', alpha=0.3) - plt.text(wl+0.02, .2, name+' SNR='+str(round(snr)), rotation=90, ha='center', va='bottom', fontsize=10) + plt.text(wl, .2, name+' SNR='+str(round(snr)), rotation=90, ha='center', va='bottom', fontsize=10) -plt.xlabel('Wavelength (microns)') -plt.ylabel('Flux (erg / (s cm2))') -plt.xlim(1.25, 1.85) -plt.title('Object ID is '+str(obj_id)) +plt.title(f'Object ID {obj_id}') ``` ## About this Notebook -**Author**: Tiffany Meshkat (IPAC Scientist) +**Author**: Tiffany Meshkat, Anahita Alavi, Anastasia Laity, Andreas Faisst, Brigitta Sipőcz, Dan Masters, Harry Teplitz, Jaladh Singhal, Shoubaneh Hemmati, Vandana Desai -**Updated**: 2025-03-19 +**Updated**: 2025-03-31 **Contact:** [the IRSA Helpdesk](https://irsa.ipac.caltech.edu/docs/help_desk.html) with questions or reporting problems.