Merge pull request #85 from bsipocz/euclid_spectra

bsipocz · web-flow · commit 171dc260a7f9 · 2025-03-31T19:38:40.000-07:00
Euclid: Simplify spectra notebook
diff --git a/tutorials/euclid_access/3_Euclid_intro_1D_spectra.md b/tutorials/euclid_access/3_Euclid_intro_1D_spectra.md
@@ -47,126 +47,133 @@ If you have questions about it, please contact the [IRSA helpdesk](https://irsa.
 
 ```{code-cell} ipython3
 # Uncomment the next line to install dependencies if needed
-# !pip install matplotlib pandas requests astropy pyvo
+# !pip install matplotlib astropy 'astroquery>=0.4.10'
 ```
 
 ```{code-cell} ipython3
-from io import BytesIO
+import urllib
 
+import numpy as np
 import matplotlib.pyplot as plt
-import pandas as pd
-import requests
 
 from astropy.io import fits
-from astropy.table import Table
+from astropy.table import QTable
+from astropy import units as u
+from astropy.visualization import quantity_support
 
-import pyvo as vo
+from astroquery.ipac.irsa import Irsa
 ```
 
-## 1. Download 1D spectra from IRSA directly to this notebook
+## 1. Search for the spectrum of a specific galaxy
 
-Search for all tables in IRSA labeled as euclid
+First, explore what Euclid catalogs are available. Note that we need to use the object ID for our targets to be able to download their spectrum.
 
-```{code-cell} ipython3
-service = vo.dal.TAPService("https://irsa.ipac.caltech.edu/TAP")
+Search for all tables in IRSA labeled as "euclid".
 
-tables = service.tables
-for tablename in tables.keys():
-    if "tap_schema" not in tablename and "euclid" in tablename:
-            tables[tablename].describe()
+```{code-cell} ipython3
+Irsa.list_catalogs(filter='euclid')
 ```
 
 ```{code-cell} ipython3
-table_mer= 'euclid_q1_mer_catalogue'
-table_1dspectra= 'euclid.objectid_spectrafile_association_q1'
-table_phz= 'euclid_q1_phz_photo_z'
-table_galaxy_candidates= 'euclid_q1_spectro_zcatalog_spe_galaxy_candidates'
+table_1dspectra = 'euclid.objectid_spectrafile_association_q1'
 ```
 
+## 2. Search for the spectrum of a specific galaxy in the 1D spectra table
+
 ```{code-cell} ipython3
-## Change the settings so we can see all the columns in the dataframe and the full column width
-## (to see the full long URL)
-pd.set_option('display.max_columns', None)
-pd.set_option('display.max_colwidth', None)
+obj_id = 2689918641685825137
+```
+
+We will use TAP and an ASQL query to find the spectral data for our galaxy. (ADQL is the [IVOA Astronomical Data Query Language](https://www.ivoa.net/documents/latest/ADQL.html) and is based on SQL.)
 
+```{code-cell} ipython3
+adql_object = f"SELECT * FROM {table_1dspectra} WHERE objectid = {obj_id}"
 
-## Can use the following lines to reset the max columns and column width of pandas
-# pd.reset_option('display.max_columns')
-# pd.reset_option('display.max_colwidth')
+# Pull the data on this particular galaxy
+result = Irsa.query_tap(adql_object).to_table()
 ```
 
-## 2. Search for the spectrum of a specific galaxy in the 1D spectra table
+Pull out the file name from the ``result`` table:
 
 ```{code-cell} ipython3
-obj_id=2739401293646823742
+file_uri = urllib.parse.urljoin(Irsa.tap_url, result['uri'][0])
+file_uri
+```
+
+## 3. Read in the spectrum for only our specific object
 
-## Pull the data on these objects
-adql_object = f"SELECT * \
-FROM {table_1dspectra} \
-WHERE objectid = {obj_id} \
-AND uri IS NOT NULL "
+Currently IRSA has the spectra stored in very large files containing multiple (14220) extensions with spectra of many targets within one tile. You can choose to read in the big file below to see what it looks like (takes a few mins to load) or skip this step and just read in the specific extension we want for the 1D spectra (recommended).
 
-## Pull the data on this particular galaxy
-result2 = service.search(adql_object)
-df2=result2.to_table().to_pandas()
-df2
+```{code-cell} ipython3
+# hdul = fits.open(file_uri)
+# hdul.info()
 ```
 
-### Create the full filename/url
+Open the large FITS file without loading it entirely into memory, pulling out just the extension we want for the 1D spectra of our object
 
 ```{code-cell} ipython3
-irsa_url='https://irsa.ipac.caltech.edu/'
+with fits.open(file_uri) as hdul:
+    spectra = QTable.read(hdul[result['hdu'][0]], format='fits')
 
-file_url=irsa_url+df2['uri'].iloc[0]
-file_url
+    spec_header = hdul[result['hdu'][0]].header
 ```
 
-## 3. Read in the spectrum using the file_url and the extension just for this object
-
-Currently IRSA has the spectra stored in very large files containing multiple (14220) extensions with spectra of many targets within one tile. You can choose to read in the big file below to see what it looks like (takes a few mins to load) or skip this step and just read in the specific extension we want for the 1D spectra (recommended).
+```{code-cell} ipython3
+spectra
+```
 
 ```{code-cell} ipython3
-#### Code to read in the large file with many extensions and spectra from a tile
-#### Currently commented out
+spec_header
+```
 
-# ## Complete file url with the irsa url at the start
-# url = file_url
-# response = requests.get(url)
+## 4. Plot the image of the extracted spectrum
 
-# hdul = fits.open(BytesIO(response.content))  # Open FITS file from memory
-# hdul.info()  # Show file info
+```{tip}
+As we use astropy.visualization's ``quantity_support``, matplotlib automatically picks up the axis units from the quantitites we plot.
 ```
 
-### Open the large FITS file without loading it entirely into memory, pulling out just the extension we want for the 1D spectra of our object
-
 ```{code-cell} ipython3
-response = requests.get(file_url)
+quantity_support()
+```
+
+```{note}
+The 1D combined spectra table contains 6 columns, below are a few highlights:
 
-with fits.open(BytesIO(response.content), memmap=True) as hdul:
-    hdu = hdul[df2['hdu'].iloc[0]]
-    dat = Table.read(hdu, format='fits', hdu=1)
-    df_obj_irsa = dat.to_pandas()
+- WAVELENGTH is in Angstroms by default
+- SIGNAL is the flux and should be multiplied by the FSCALE factor in the header
+- MASK values can be used to determine which flux bins to discard. MASK = odd and MASK >=64 means the flux bins not be used.
 ```
 
-### Plot the image of the extracted spectrum
+```{code-cell} ipython3
+signal_scaled = spectra['SIGNAL'] * spec_header['FSCALE']
+```
+
+We investigate the MASK column to see which flux bins are recommended to keep vs "Do Not Use"
+
+```{code-cell} ipython3
+plt.plot(spectra['WAVELENGTH'].to(u.micron), spectra['MASK'])
+plt.ylabel('Mask value')
+plt.title('Values of MASK by flux bin')
+```
 
-- Convert the wavelength to microns
+We use the MASK column to create a boolean mask for values to ignore. We use the inverse of this mask to mark the flux bins to use.
 
 ```{code-cell} ipython3
-## Now the data are read in, show an image
+bad_mask = (spectra['MASK'].value % 2 == 1) | (spectra['MASK'].value >= 64)
 
-## Converting from Angstrom to microns
-plt.plot(df_obj_irsa['WAVELENGTH']/10000., df_obj_irsa['SIGNAL'])
+plt.plot(spectra['WAVELENGTH'].to(u.micron), np.ma.masked_where(bad_mask, signal_scaled), color='black', label='Spectrum')
+plt.plot(spectra['WAVELENGTH'], np.ma.masked_where(~bad_mask, signal_scaled), color='red', label='Do not use')
+plt.plot(spectra['WAVELENGTH'], np.sqrt(spectra['VAR']) * spec_header['FSCALE'], color='grey', label='Error')
 
-plt.xlabel('Wavelength (microns)')
-plt.ylabel('Flux'+dat['SIGNAL'].unit.to_string('latex_inline'))
-plt.title(obj_id)
+plt.legend(loc='upper right')
+plt.ylim(-0.15E-16, 0.25E-16)
+plt.title(f'Object ID {obj_id}')
 ```
 
 ## About this Notebook
 
-**Author**: Tiffany Meshkat (IPAC Scientist)
+**Author**: Tiffany Meshkat, Anahita Alavi, Anastasia Laity, Andreas Faisst, Brigitta Sipőcz, Dan Masters, Harry Teplitz, Jaladh Singhal, Shoubaneh Hemmati, Vandana Desai
 
-**Updated**: 2025-03-19
+**Updated**: 2025-03-31
 
 **Contact:** [the IRSA Helpdesk](https://irsa.ipac.caltech.edu/docs/help_desk.html) with questions or reporting problems.