diff --git a/.github/workflows/update_data.yml b/.github/workflows/update_data.yml
new file mode 100644
index 0000000..4e18620
--- /dev/null
+++ b/.github/workflows/update_data.yml
@@ -0,0 +1,22 @@
+name: Updata Microsoft Stock Price Data
+
+on:
+ schedule:
+ # weekly
+ - cron: "0 0 * * 0"
+ # support manual trigger
+ workflow_dispatch:
+
+jobs:
+ update_data:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v5
+ with:
+ python-version: "3.12"
+ cache: pip
+ - name: Install dependencies
+ run: pip install -r requirements.txt
+ - name: Update data
+ run: python lab_13/update_data.py
\ No newline at end of file
diff --git a/lab_13/create_table.ipynb b/lab_13/create_table.ipynb
new file mode 100644
index 0000000..5e15fce
--- /dev/null
+++ b/lab_13/create_table.ipynb
@@ -0,0 +1,458 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "5b781f40",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# and we will store the data in bigquery\n",
+ "import pandas_gbq\n",
+ "import pydata_google_auth\n",
+ "\n",
+ "import yfinance as yf # for downloading stock data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "84cd426d",
+ "metadata": {},
+ "source": [
+ "### Authentication"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "8c0234f7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# BigQuery authentication\n",
+ "SCOPES = [\n",
+ " 'https://www.googleapis.com/auth/cloud-platform',\n",
+ " 'https://www.googleapis.com/auth/drive',\n",
+ "]\n",
+ "\n",
+ "credentials = pydata_google_auth.get_user_credentials(\n",
+ " SCOPES,\n",
+ " # Note, this doesn't work if you're running from a notebook on a\n",
+ " # remote sever, such as over SSH or with Google Colab. In those cases,\n",
+ " # install the gcloud command line interface and authenticate with the\n",
+ " # `gcloud auth application-default login` command and the `--no-browser`\n",
+ " # option.\n",
+ " auth_local_webserver=True,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7f461dc3",
+ "metadata": {},
+ "source": [
+ "### Getting daily data from last month"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "ada8c4db",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Open | \n",
+ " High | \n",
+ " Low | \n",
+ " Close | \n",
+ " Volume | \n",
+ " Dividends | \n",
+ " Stock Splits | \n",
+ "
\n",
+ " \n",
+ " Date | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2025-03-18 00:00:00-04:00 | \n",
+ " 387.070007 | \n",
+ " 387.369995 | \n",
+ " 381.100006 | \n",
+ " 383.519989 | \n",
+ " 19486900 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2025-03-19 00:00:00-04:00 | \n",
+ " 385.529999 | \n",
+ " 389.679993 | \n",
+ " 384.000000 | \n",
+ " 387.820007 | \n",
+ " 19185500 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2025-03-20 00:00:00-04:00 | \n",
+ " 385.739990 | \n",
+ " 391.790009 | \n",
+ " 383.279999 | \n",
+ " 386.839996 | \n",
+ " 18470500 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2025-03-21 00:00:00-04:00 | \n",
+ " 383.220001 | \n",
+ " 391.739990 | \n",
+ " 382.799988 | \n",
+ " 391.260010 | \n",
+ " 39675900 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2025-03-24 00:00:00-04:00 | \n",
+ " 395.399994 | \n",
+ " 395.399994 | \n",
+ " 389.809998 | \n",
+ " 393.079987 | \n",
+ " 21004500 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2025-03-25 00:00:00-04:00 | \n",
+ " 393.920013 | \n",
+ " 396.359985 | \n",
+ " 392.640015 | \n",
+ " 395.160004 | \n",
+ " 15775000 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2025-03-26 00:00:00-04:00 | \n",
+ " 395.000000 | \n",
+ " 395.309998 | \n",
+ " 388.570007 | \n",
+ " 389.970001 | \n",
+ " 16108400 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2025-03-27 00:00:00-04:00 | \n",
+ " 390.130005 | \n",
+ " 392.239990 | \n",
+ " 387.399994 | \n",
+ " 390.579987 | \n",
+ " 13766800 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2025-03-28 00:00:00-04:00 | \n",
+ " 388.079987 | \n",
+ " 389.130005 | \n",
+ " 376.929993 | \n",
+ " 378.799988 | \n",
+ " 21632000 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2025-03-31 00:00:00-04:00 | \n",
+ " 372.540009 | \n",
+ " 377.070007 | \n",
+ " 367.239990 | \n",
+ " 375.390015 | \n",
+ " 35184700 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2025-04-01 00:00:00-04:00 | \n",
+ " 374.649994 | \n",
+ " 382.850006 | \n",
+ " 373.230011 | \n",
+ " 382.190002 | \n",
+ " 19689500 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2025-04-02 00:00:00-04:00 | \n",
+ " 377.970001 | \n",
+ " 385.079987 | \n",
+ " 376.619995 | \n",
+ " 382.140015 | \n",
+ " 16092600 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2025-04-03 00:00:00-04:00 | \n",
+ " 374.790009 | \n",
+ " 377.480011 | \n",
+ " 369.350006 | \n",
+ " 373.109985 | \n",
+ " 30198000 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2025-04-04 00:00:00-04:00 | \n",
+ " 364.130005 | \n",
+ " 374.589996 | \n",
+ " 359.480011 | \n",
+ " 359.839996 | \n",
+ " 49209900 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2025-04-07 00:00:00-04:00 | \n",
+ " 350.880005 | \n",
+ " 371.000000 | \n",
+ " 344.790009 | \n",
+ " 357.859985 | \n",
+ " 50425000 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2025-04-08 00:00:00-04:00 | \n",
+ " 368.260010 | \n",
+ " 373.649994 | \n",
+ " 350.250000 | \n",
+ " 354.559998 | \n",
+ " 35868900 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2025-04-09 00:00:00-04:00 | \n",
+ " 353.540009 | \n",
+ " 393.230011 | \n",
+ " 353.100006 | \n",
+ " 390.489990 | \n",
+ " 50199700 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2025-04-10 00:00:00-04:00 | \n",
+ " 382.059998 | \n",
+ " 383.899994 | \n",
+ " 367.799988 | \n",
+ " 381.350006 | \n",
+ " 38024400 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2025-04-11 00:00:00-04:00 | \n",
+ " 380.640015 | \n",
+ " 390.049988 | \n",
+ " 378.890015 | \n",
+ " 388.450012 | \n",
+ " 23839200 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2025-04-14 00:00:00-04:00 | \n",
+ " 393.220001 | \n",
+ " 394.649994 | \n",
+ " 384.209991 | \n",
+ " 387.809998 | \n",
+ " 19251200 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2025-04-15 00:00:00-04:00 | \n",
+ " 388.510010 | \n",
+ " 391.890015 | \n",
+ " 384.160004 | \n",
+ " 385.730011 | \n",
+ " 17199900 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2025-04-16 00:00:00-04:00 | \n",
+ " 380.670013 | \n",
+ " 381.609985 | \n",
+ " 368.000000 | \n",
+ " 371.609985 | \n",
+ " 21967800 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2025-04-17 00:00:00-04:00 | \n",
+ " 373.440002 | \n",
+ " 374.321503 | \n",
+ " 366.890015 | \n",
+ " 367.779999 | \n",
+ " 20858907 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Open High Low Close \\\n",
+ "Date \n",
+ "2025-03-18 00:00:00-04:00 387.070007 387.369995 381.100006 383.519989 \n",
+ "2025-03-19 00:00:00-04:00 385.529999 389.679993 384.000000 387.820007 \n",
+ "2025-03-20 00:00:00-04:00 385.739990 391.790009 383.279999 386.839996 \n",
+ "2025-03-21 00:00:00-04:00 383.220001 391.739990 382.799988 391.260010 \n",
+ "2025-03-24 00:00:00-04:00 395.399994 395.399994 389.809998 393.079987 \n",
+ "2025-03-25 00:00:00-04:00 393.920013 396.359985 392.640015 395.160004 \n",
+ "2025-03-26 00:00:00-04:00 395.000000 395.309998 388.570007 389.970001 \n",
+ "2025-03-27 00:00:00-04:00 390.130005 392.239990 387.399994 390.579987 \n",
+ "2025-03-28 00:00:00-04:00 388.079987 389.130005 376.929993 378.799988 \n",
+ "2025-03-31 00:00:00-04:00 372.540009 377.070007 367.239990 375.390015 \n",
+ "2025-04-01 00:00:00-04:00 374.649994 382.850006 373.230011 382.190002 \n",
+ "2025-04-02 00:00:00-04:00 377.970001 385.079987 376.619995 382.140015 \n",
+ "2025-04-03 00:00:00-04:00 374.790009 377.480011 369.350006 373.109985 \n",
+ "2025-04-04 00:00:00-04:00 364.130005 374.589996 359.480011 359.839996 \n",
+ "2025-04-07 00:00:00-04:00 350.880005 371.000000 344.790009 357.859985 \n",
+ "2025-04-08 00:00:00-04:00 368.260010 373.649994 350.250000 354.559998 \n",
+ "2025-04-09 00:00:00-04:00 353.540009 393.230011 353.100006 390.489990 \n",
+ "2025-04-10 00:00:00-04:00 382.059998 383.899994 367.799988 381.350006 \n",
+ "2025-04-11 00:00:00-04:00 380.640015 390.049988 378.890015 388.450012 \n",
+ "2025-04-14 00:00:00-04:00 393.220001 394.649994 384.209991 387.809998 \n",
+ "2025-04-15 00:00:00-04:00 388.510010 391.890015 384.160004 385.730011 \n",
+ "2025-04-16 00:00:00-04:00 380.670013 381.609985 368.000000 371.609985 \n",
+ "2025-04-17 00:00:00-04:00 373.440002 374.321503 366.890015 367.779999 \n",
+ "\n",
+ " Volume Dividends Stock Splits \n",
+ "Date \n",
+ "2025-03-18 00:00:00-04:00 19486900 0.0 0.0 \n",
+ "2025-03-19 00:00:00-04:00 19185500 0.0 0.0 \n",
+ "2025-03-20 00:00:00-04:00 18470500 0.0 0.0 \n",
+ "2025-03-21 00:00:00-04:00 39675900 0.0 0.0 \n",
+ "2025-03-24 00:00:00-04:00 21004500 0.0 0.0 \n",
+ "2025-03-25 00:00:00-04:00 15775000 0.0 0.0 \n",
+ "2025-03-26 00:00:00-04:00 16108400 0.0 0.0 \n",
+ "2025-03-27 00:00:00-04:00 13766800 0.0 0.0 \n",
+ "2025-03-28 00:00:00-04:00 21632000 0.0 0.0 \n",
+ "2025-03-31 00:00:00-04:00 35184700 0.0 0.0 \n",
+ "2025-04-01 00:00:00-04:00 19689500 0.0 0.0 \n",
+ "2025-04-02 00:00:00-04:00 16092600 0.0 0.0 \n",
+ "2025-04-03 00:00:00-04:00 30198000 0.0 0.0 \n",
+ "2025-04-04 00:00:00-04:00 49209900 0.0 0.0 \n",
+ "2025-04-07 00:00:00-04:00 50425000 0.0 0.0 \n",
+ "2025-04-08 00:00:00-04:00 35868900 0.0 0.0 \n",
+ "2025-04-09 00:00:00-04:00 50199700 0.0 0.0 \n",
+ "2025-04-10 00:00:00-04:00 38024400 0.0 0.0 \n",
+ "2025-04-11 00:00:00-04:00 23839200 0.0 0.0 \n",
+ "2025-04-14 00:00:00-04:00 19251200 0.0 0.0 \n",
+ "2025-04-15 00:00:00-04:00 17199900 0.0 0.0 \n",
+ "2025-04-16 00:00:00-04:00 21967800 0.0 0.0 \n",
+ "2025-04-17 00:00:00-04:00 20858907 0.0 0.0 "
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dat = yf.Ticker(\"MSFT\")\n",
+ "msft_df = dat.history(period='1mo')\n",
+ "msft_df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e9c0dd1c",
+ "metadata": {},
+ "source": [
+ "## Creating BigQuery Table"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "180c6f83",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 874.00it/s]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# to BigQuery\n",
+ "pandas_gbq.to_gbq(\n",
+ " msft_df,\n",
+ " destination_table='stock_data.msft',\n",
+ " project_id='sipa-adv-c-roberto',\n",
+ " if_exists='replace',\n",
+ " credentials=credentials,\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": ".venv",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/lab_13/load_data.py b/lab_13/load_data.py
new file mode 100644
index 0000000..49ebfe9
--- /dev/null
+++ b/lab_13/load_data.py
@@ -0,0 +1,63 @@
+# and we will store the data in bigquery
+import pandas_gbq
+import pydata_google_auth
+from google.oauth2 import service_account
+import os
+
+import yfinance as yf # for downloading stock data
+
+def get_price_data():
+ dat = yf.Ticker("MSFT")
+ msft_df = dat.history(period='1mo')
+ return msft_df
+
+def get_bq_credentials():
+ # Load the data from BigQuery
+ SCOPES = [
+ 'https://www.googleapis.com/auth/cloud-platform',
+ 'https://www.googleapis.com/auth/drive',
+ ]
+
+ # getting the credentials from the environment variable
+ bq_credentials = os.environ.get('BQ_LAB13')
+ # as json file
+ credentials = service_account.Credentials.from_service_account_info(
+ bq_credentials,
+ scopes=SCOPES
+ )
+ return credentials
+
+def get_bq_data():
+
+ # Load the data from BigQuery into a DataFrame
+ query = "SELECT * FROM `stock_data.msft`"
+
+ # getting the credentials
+ credentials = get_bq_credentials()
+
+ df = pandas_gbq.read_gbq(query, project_id='sipa-adv-c-roberto', credentials=credentials)
+
+ return df
+
+def update_data():
+ # get the data from yfinance
+ msft_df = get_price_data()
+ # get the data from bigquery
+ bq_df = get_bq_data()
+
+ # comparing latest date from bq and msft_df
+ bq_latest_date = bq_df['Date'].max()
+ msft_latest_date = msft_df.index.max()
+ # if the latest date from msft_df is greater than bq_latest_date, we add new data to bq
+ if msft_latest_date > bq_latest_date:
+ # get the new data from msft_df
+ new_data = msft_df[msft_df.index > bq_latest_date]
+ # add the new data to bq
+ pandas_gbq.to_gbq(new_data, 'stock_data.msft', project_id='sipa-adv-c-roberto', if_exists='append')
+ print("Data updated")
+ else:
+ print("No new data")
+
+if __name__ == "__main__":
+ # update the data
+ update_data()
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 112f537..99358d1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,4 +8,7 @@ plotly
matplotlib
python-dotenv
pytest
-duckdb
\ No newline at end of file
+duckdb
+google-cloud-bigquery
+pandas-gbq
+yfinance
\ No newline at end of file