From f305309df2b3ade1986646b83ad30d64373c5570 Mon Sep 17 00:00:00 2001 From: Gregor Decristoforo Date: Tue, 19 Nov 2024 11:00:19 +0100 Subject: [PATCH 1/2] Update exercise 2 in xarray lecture --- content/xarray.rst | 75 +++++++++++++++++++++++++--------------------- 1 file changed, 41 insertions(+), 34 deletions(-) diff --git a/content/xarray.rst b/content/xarray.rst index a49b9ab6..17292c8a 100644 --- a/content/xarray.rst +++ b/content/xarray.rst @@ -380,7 +380,16 @@ Exercises 2 .. challenge:: Exercises: Xarray-2 - Let's change from climate science to finance for this example. Put the stock prices and trading volumes of three companies over ten days in one dataset. Create an Xarray Dataset that uses time and company as dimensions and contains two DataArrays: ``stock_price`` and ``trading_volume``. You can choose the values for the stock prices and trading volumes yourself. As a last thing, add the currency of the stock prices as an attribute to the Dataset. + Let's change from climate science to finance for this example. Put the stock prices and trading volumes of three companies in one dataset. Create an Xarray Dataset that uses time and company as dimensions and contains two DataArrays: ``stock_price`` and ``trading_volume``. You can download the data as a pandas DataFrame with the following code: :: + + import yfinance as yf + + AAPL_df = yf.download("AAPL", start="2020-01-01", end="2024-01-01") + GOOGL_df = yf.download("GOOGL", start="2020-01-01", end="2024-01-01") + MSFT_df = yf.download("MSFT", start="2020-01-01", end="2024-01-01") + + + As a last thing, add the currency of the stock prices as an attribute to the Dataset. .. solution:: Solutions: Xarray-2 @@ -388,46 +397,44 @@ Exercises 2 import xarray as xr import numpy as np + import yfinance as yf + + start_date = "2020-01-01" + end_date = "2024-01-01" + + AAPL_df = yf.download("AAPL", start=start_date, end=end_date) + GOOGL_df = yf.download("GOOGL", start=start_date, end=end_date) + MSFT_df = yf.download("MSFT", start=start_date, end=end_date) + + + stock_prices = np.array( + [ + AAPL_df["Close"].values, + GOOGL_df["Close"].values, + MSFT_df["Close"].values, + ] + ) + + trading_volumes = np.array( + [ + AAPL_df["Volume"].values, + GOOGL_df["Volume"].values, + MSFT_df["Volume"].values, + ] + ) + - time = [ - "2023-01-01", - "2023-01-02", - "2023-01-03", - "2023-01-04", - "2023-01-05", - "2023-01-06", - "2023-01-07", - "2023-01-08", - "2023-01-09", - "2023-01-10", - ] companies = ["AAPL", "GOOGL", "MSFT"] - stock_prices = np.random.normal(loc=[100, 1500, 200], scale=[10, 50, 20], size=(10, 3)) - trading_volumes = np.random.randint(1000, 10000, size=(10, 3)) + time = AAPL_df.index[:].strftime("%Y-%m-%d").tolist() + ds = xr.Dataset( - data_vars = { - "stock_price": (["time", "company"], stock_prices), - "trading_volume": (["time", "company"], trading_volumes), + { + "stock_price": (["company", "time"], stock_prices[:, :, 0]), + "trading_volume": (["company", "time"], trading_volumes[:, :, 0]), }, coords={"time": time, "company": companies}, attrs={"currency": "USD"}, ) - print(ds) - - The output should then resemble this: :: - - > python exercise.py - Size: 940B - Dimensions: (time: 10, company: 3) - Coordinates: - * time (time) Date: Wed, 20 Nov 2024 08:00:04 +0100 Subject: [PATCH 2/2] Add yfinance to environment.yml --- software/environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/software/environment.yml b/software/environment.yml index 66457722..937e9cca 100644 --- a/software/environment.yml +++ b/software/environment.yml @@ -24,6 +24,7 @@ dependencies: - vega_datasets - xarray - netcdf4 + - yfinance - pip - pip: - pythia_datasets