diff --git a/tutorials/euclid_access/3_Euclid_intro_1D_spectra.md b/tutorials/euclid_access/3_Euclid_intro_1D_spectra.md index 6c310f43..b7fadba3 100644 --- a/tutorials/euclid_access/3_Euclid_intro_1D_spectra.md +++ b/tutorials/euclid_access/3_Euclid_intro_1D_spectra.md @@ -47,126 +47,152 @@ If you have questions about it, please contact the [IRSA helpdesk](https://irsa. ```{code-cell} ipython3 # Uncomment the next line to install dependencies if needed -# !pip install matplotlib pandas requests astropy pyvo +# !pip install matplotlib astropy 'astroquery>=0.4.10' ``` ```{code-cell} ipython3 -from io import BytesIO - +import urllib import matplotlib.pyplot as plt -import pandas as pd -import requests +from matplotlib.lines import Line2D +import numpy as np from astropy.io import fits -from astropy.table import Table +from astropy.table import QTable +from astropy.visualization import quantity_support -import pyvo as vo +from astroquery.ipac.irsa import Irsa ``` -## 1. Download 1D spectra from IRSA directly to this notebook - -Search for all tables in IRSA labeled as euclid +## 1. Search for the spectrum of a specific galaxy -```{code-cell} ipython3 -service = vo.dal.TAPService("https://irsa.ipac.caltech.edu/TAP") +First, explore what Euclid catalogs are available. Note that we need to use the object ID for our targets to be able to download their spectrum. -tables = service.tables -for tablename in tables.keys(): - if "tap_schema" not in tablename and "euclid" in tablename: - tables[tablename].describe() -``` +Search for all tables in IRSA labeled as "euclid". ```{code-cell} ipython3 -table_mer= 'euclid_q1_mer_catalogue' -table_1dspectra= 'euclid.objectid_spectrafile_association_q1' -table_phz= 'euclid_q1_phz_photo_z' -table_galaxy_candidates= 'euclid_q1_spectro_zcatalog_spe_galaxy_candidates' +Irsa.list_catalogs(filter='euclid') ``` ```{code-cell} ipython3 -## Change the settings so we can see all the columns in the dataframe and the full column width -## (to see the full long URL) -pd.set_option('display.max_columns', None) -pd.set_option('display.max_colwidth', None) - - -## Can use the following lines to reset the max columns and column width of pandas -# pd.reset_option('display.max_columns') -# pd.reset_option('display.max_colwidth') +table_1dspectra = 'euclid.objectid_spectrafile_association_q1' ``` ## 2. Search for the spectrum of a specific galaxy in the 1D spectra table ```{code-cell} ipython3 -obj_id=2739401293646823742 - -## Pull the data on these objects -adql_object = f"SELECT * \ -FROM {table_1dspectra} \ -WHERE objectid = {obj_id} \ -AND uri IS NOT NULL " - -## Pull the data on this particular galaxy -result2 = service.search(adql_object) -df2=result2.to_table().to_pandas() -df2 +obj_id = 2689918641685825137 ``` -### Create the full filename/url +We will use TAP and an ASQL query to find the spectral data for our galaxy. (ADQL is the [IVOA Astronomical Data Query Language](https://www.ivoa.net/documents/latest/ADQL.html) and is based on SQL.) ```{code-cell} ipython3 -irsa_url='https://irsa.ipac.caltech.edu/' +adql_object = f"SELECT * FROM {table_1dspectra} WHERE objectid = {obj_id}" -file_url=irsa_url+df2['uri'].iloc[0] -file_url +# Pull the data on this particular galaxy +result = Irsa.query_tap(adql_object).to_table() ``` -## 3. Read in the spectrum using the file_url and the extension just for this object +Pull out the file name from the ``result`` table: + +```{code-cell} ipython3 +file_uri = urllib.parse.urljoin(Irsa.tap_url, result['uri'][0]) +file_uri +``` + +## 3. Read in the spectrum for only our specific object Currently IRSA has the spectra stored in very large files containing multiple (14220) extensions with spectra of many targets within one tile. You can choose to read in the big file below to see what it looks like (takes a few mins to load) or skip this step and just read in the specific extension we want for the 1D spectra (recommended). ```{code-cell} ipython3 -#### Code to read in the large file with many extensions and spectra from a tile -#### Currently commented out +# hdul = fits.open(file_uri) +# hdul.info() +``` -# ## Complete file url with the irsa url at the start -# url = file_url -# response = requests.get(url) +Open the large FITS file without loading it entirely into memory, pulling out just the extension we want for the 1D spectra of our object -# hdul = fits.open(BytesIO(response.content)) # Open FITS file from memory -# hdul.info() # Show file info +```{code-cell} ipython3 +with fits.open(file_uri) as hdul: + spectra = QTable.read(hdul[result['hdu'][0]], format='fits') + + spec_header = hdul[result['hdu'][0]].header +``` + +```{code-cell} ipython3 +spectra ``` -### Open the large FITS file without loading it entirely into memory, pulling out just the extension we want for the 1D spectra of our object +The 1D combined spectra table contains 6 columns, below are a few highlights: + +- WAVELENGTH is in Angstroms by default +- SIGNAL is the flux and should be multiplied by the FSCALE factor in the header +- MASK values can be used to determine which flux bins to discard. MASK = odd and MASK >=64 means the flux bins not be used. + ++++ + +We investigate the MASK column to see which flux bins are recommended to keep vs "Do Not Use" ```{code-cell} ipython3 -response = requests.get(file_url) +plt.plot(spectra['WAVELENGTH']/10000., spectra['MASK']) +plt.xlabel('Wavelength ($\mu$m)') +plt.ylabel('Mask value') +plt.title('Values of MASK by flux bin') +``` -with fits.open(BytesIO(response.content), memmap=True) as hdul: - hdu = hdul[df2['hdu'].iloc[0]] - dat = Table.read(hdu, format='fits', hdu=1) - df_obj_irsa = dat.to_pandas() +```{code-cell} ipython3 +spec_header ``` -### Plot the image of the extracted spectrum +## 4. Plot the image of the extracted spectrum + +```{tip} +As we use astropy.visualization's ``quantity_support``, matplotlib automatically picks up the axis units from the quantitites we plot. +``` - Convert the wavelength to microns +- Multiple the signal by FSCALE from the header +- Use the MASK column to discard any values where MASK = odd or >=64 +- Use the VAR column (variance) to plot the error on the data ```{code-cell} ipython3 -## Now the data are read in, show an image +wavelength = spectra['WAVELENGTH'] / 10000. +signal_scaled = spectra['SIGNAL'] * spec_header['FSCALE'] + +## Use the MASK column to create a "good mask", just the flux bins to use +bad_mask = (spectra['MASK'].value % 2 == 1) | (spectra['MASK'].value >= 64) +good_mask = ~bad_mask + +## Plot the spectrum in black for the good flux bins and in red for the masked (do not use) flux bins. +for i in range(1, len(wavelength)): + ## Plot good data (black) + if good_mask[i]: + plt.plot(wavelength[i-1:i+1], signal_scaled[i-1:i+1], color='black') + ## Plot bad data (red) + elif bad_mask[i]: + plt.plot(wavelength[i-1:i+1], signal_scaled[i-1:i+1], color='red') + + +plt.plot(wavelength, np.sqrt(spectra['VAR']) * spec_header['FSCALE'], color='grey', label='Error') + +## Add the line names to the legend +spectrum_line = Line2D([0], [0], color='black', lw=2, label='Spectrum') +bad_line = Line2D([0], [0], color='red', lw=2, label='Do Not Use') +error_line = Line2D([0], [0], color='grey', lw=2, label='Error') +plt.legend(handles=[spectrum_line, bad_line,error_line], loc='upper right') + -## Converting from Angstrom to microns -plt.plot(df_obj_irsa['WAVELENGTH']/10000., df_obj_irsa['SIGNAL']) +plt.xlabel('Wavelength ($\mu$m)') +plt.ylabel('Flux ($\mathrm{erg\,\mu m^{-1}\,s^{-1}\,cm^{-2}}$)') +plt.ylim(-0.15E-16, 0.25E-16) +# plt.xlim(1.25,1.85) +plt.title('Object ID is ' + str(obj_id)) -plt.xlabel('Wavelength (microns)') -plt.ylabel('Flux'+dat['SIGNAL'].unit.to_string('latex_inline')) -plt.title(obj_id) +plt.show() ``` ## About this Notebook -**Author**: Tiffany Meshkat (IPAC Scientist) +**Author**: Tiffany Meshkat, Anahita Alavi, Anastasia Laity, Andreas Faisst, Brigitta Sipőcz, Dan Masters, Harry Teplitz, Jaladh Singhal, Shoubaneh Hemmati, Vandana Desai -**Updated**: 2025-03-19 +**Updated**: 2025-03-28 **Contact:** [the IRSA Helpdesk](https://irsa.ipac.caltech.edu/docs/help_desk.html) with questions or reporting problems.