From c6aea1744dbec6f57e2d07efd2e7688fc3011358 Mon Sep 17 00:00:00 2001 From: kuds Date: Sat, 30 Aug 2025 12:15:04 -0500 Subject: [PATCH 01/29] Remove alpaca-trade-api from pyproject.toml Removed 'alpaca-trade-api' from dependencies. --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 4b133d53fa..18fbdfda90 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,6 @@ github = "https://github.com/finrl/finrl-library" [tool.poetry.dependencies] python = "^3.7" elegantrl = {git="https://github.com/AI4Finance-Foundation/ElegantRL.git#egg=elegantrl"} -alpaca-trade-api = "^3" ccxt = "^3" jqdatasdk = "^1" pyfolio-reloaded = "^0.9" From b3a29e3a4a383de27fbc0a8cde12d0235978b5f1 Mon Sep 17 00:00:00 2001 From: kuds Date: Sat, 30 Aug 2025 12:23:15 -0500 Subject: [PATCH 02/29] Replace 'alpaca_trade_api' with 'alpaca' import --- finrl/meta/env_stock_trading/env_stock_papertrading.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/finrl/meta/env_stock_trading/env_stock_papertrading.py b/finrl/meta/env_stock_trading/env_stock_papertrading.py index db795b9811..7c6a4b0bf3 100644 --- a/finrl/meta/env_stock_trading/env_stock_papertrading.py +++ b/finrl/meta/env_stock_trading/env_stock_papertrading.py @@ -4,7 +4,7 @@ import threading import time -import alpaca_trade_api as tradeapi +import alpaca as tradeapi import gymnasium as gym import numpy as np import pandas as pd From 1d41361afbae6fac9b5574426c811bfd9f0d8cfe Mon Sep 17 00:00:00 2001 From: kuds Date: Sat, 30 Aug 2025 12:32:04 -0500 Subject: [PATCH 03/29] Remove alpaca_trade_api from requirements.txt Removed alpaca_trade_api dependency from requirements. --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 8220f94c46..cdd7fc4682 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ alpaca-py -alpaca_trade_api>=2.1.0 ccxt>=1.66.32 elegantrl From 949d3d61c1ca1d6c1fd8ff22cee87cb12b59f87a Mon Sep 17 00:00:00 2001 From: kuds Date: Sat, 30 Aug 2025 12:38:25 -0500 Subject: [PATCH 04/29] Change import from 'alpaca_trade_api' to 'alpaca' --- finrl/meta/paper_trading/alpaca.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/finrl/meta/paper_trading/alpaca.py b/finrl/meta/paper_trading/alpaca.py index e614f6e810..cfec2e3af6 100644 --- a/finrl/meta/paper_trading/alpaca.py +++ b/finrl/meta/paper_trading/alpaca.py @@ -6,7 +6,7 @@ import threading import time -import alpaca_trade_api as tradeapi +import alpaca as tradeapi import gym import numpy as np import pandas as pd From e8a95c15a9244488d1d5d472f5b129ca99926e66 Mon Sep 17 00:00:00 2001 From: kuds Date: Sat, 30 Aug 2025 13:04:39 -0500 Subject: [PATCH 05/29] Change import from alpaca_trade_api to alpaca --- finrl/meta/paper_trading/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/finrl/meta/paper_trading/common.py b/finrl/meta/paper_trading/common.py index aae4c52220..b4d0ade751 100644 --- a/finrl/meta/paper_trading/common.py +++ b/finrl/meta/paper_trading/common.py @@ -798,7 +798,7 @@ def test( # ----------------------------------------------------------------------------------------------------------------------------------------- -import alpaca_trade_api as tradeapi +import alpaca as tradeapi import pandas_market_calendars as tc import numpy as np import pandas as pd From d5820acbe15982177cc0a882ac0beeb0f46ecb31 Mon Sep 17 00:00:00 2001 From: kuds Date: Sat, 30 Aug 2025 13:05:32 -0500 Subject: [PATCH 06/29] Clean up imports in processor_alpaca.py Removed unused import statements for alpaca_trade_api. --- finrl/meta/data_processors/processor_alpaca.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/finrl/meta/data_processors/processor_alpaca.py b/finrl/meta/data_processors/processor_alpaca.py index 0a82100073..c7af744924 100644 --- a/finrl/meta/data_processors/processor_alpaca.py +++ b/finrl/meta/data_processors/processor_alpaca.py @@ -14,9 +14,6 @@ from alpaca.data.timeframe import TimeFrame from stockstats import StockDataFrame as Sdf -# import alpaca_trade_api as tradeapi - - class AlpacaProcessor: def __init__(self, API_KEY=None, API_SECRET=None, API_BASE_URL=None, client=None): if client is None: From c99b9fb68460ff46b02a9cf9e07a3497954d4bd9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 30 Aug 2025 18:05:43 +0000 Subject: [PATCH 07/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- finrl/meta/data_processors/processor_alpaca.py | 1 + 1 file changed, 1 insertion(+) diff --git a/finrl/meta/data_processors/processor_alpaca.py b/finrl/meta/data_processors/processor_alpaca.py index c7af744924..34018ec7fa 100644 --- a/finrl/meta/data_processors/processor_alpaca.py +++ b/finrl/meta/data_processors/processor_alpaca.py @@ -14,6 +14,7 @@ from alpaca.data.timeframe import TimeFrame from stockstats import StockDataFrame as Sdf + class AlpacaProcessor: def __init__(self, API_KEY=None, API_SECRET=None, API_BASE_URL=None, client=None): if client is None: From 1e46a22b5de2eb233bd5f98e9fb1a9076c1f105e Mon Sep 17 00:00:00 2001 From: kuds Date: Fri, 5 Sep 2025 12:20:19 -0500 Subject: [PATCH 08/29] Clean up package installation commands in notebook --- examples/Stock_NeurIPS2018_SB3.ipynb | 9 --------- 1 file changed, 9 deletions(-) diff --git a/examples/Stock_NeurIPS2018_SB3.ipynb b/examples/Stock_NeurIPS2018_SB3.ipynb index 903e196d05..48f0e16695 100644 --- a/examples/Stock_NeurIPS2018_SB3.ipynb +++ b/examples/Stock_NeurIPS2018_SB3.ipynb @@ -224,15 +224,6 @@ ], "source": [ "## install required packages\n", - "\n", - "!pip install swig\n", - "!pip install wrds\n", - "!pip install pyportfolioopt\n", - "## install finrl library\n", - "!pip install -q condacolab\n", - "import condacolab\n", - "condacolab.install()\n", - "!apt-get update -y -qq && apt-get install -y -qq cmake libopenmpi-dev python3-dev zlib1g-dev libgl1-mesa-glx swig\n", "!pip install git+https://github.com/AI4Finance-Foundation/FinRL.git" ] }, From decdadd966d79432fbebf567fb2b85ed1f5158fc Mon Sep 17 00:00:00 2001 From: kuds Date: Mon, 8 Sep 2025 12:34:05 -0500 Subject: [PATCH 09/29] Clean up installation commands in demo notebook Removed unnecessary installation commands for wrds, swig, and condacolab. --- examples/FinRL_PaperTrading_Demo.ipynb | 6 ------ 1 file changed, 6 deletions(-) diff --git a/examples/FinRL_PaperTrading_Demo.ipynb b/examples/FinRL_PaperTrading_Demo.ipynb index f54a01b7f3..5b20b41cda 100644 --- a/examples/FinRL_PaperTrading_Demo.ipynb +++ b/examples/FinRL_PaperTrading_Demo.ipynb @@ -34,12 +34,6 @@ "outputs": [], "source": [ "## install finrl library\n", - "!pip install wrds\n", - "!pip install swig\n", - "!pip install -q condacolab\n", - "import condacolab\n", - "condacolab.install()\n", - "!apt-get update -y -qq && apt-get install -y -qq cmake libopenmpi-dev python3-dev zlib1g-dev libgl1-mesa-glx swig\n", "!pip install git+https://github.com/AI4Finance-Foundation/FinRL.git\n" ] }, From d810063fcefcd871a13f52a96a399984f45981c3 Mon Sep 17 00:00:00 2001 From: kuds Date: Mon, 8 Sep 2025 17:01:00 -0500 Subject: [PATCH 10/29] Update Alpaca API integration to use TradingClient --- finrl/meta/paper_trading/alpaca.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/finrl/meta/paper_trading/alpaca.py b/finrl/meta/paper_trading/alpaca.py index cfec2e3af6..c7c70290f6 100644 --- a/finrl/meta/paper_trading/alpaca.py +++ b/finrl/meta/paper_trading/alpaca.py @@ -6,8 +6,9 @@ import threading import time -import alpaca as tradeapi -import gym +import alpaca +from alpaca.trading.client import TradingClient +import gymnasium as gym import numpy as np import pandas as pd import torch @@ -94,7 +95,7 @@ def __init__( # connect to Alpaca trading API try: - self.alpaca = tradeapi.REST(API_KEY, API_SECRET, API_BASE_URL, "v2") + self.alpaca = TradingClient(api_key=API_KEY, secret_key=SECRET_KEY,paper=True) except: raise ValueError( "Fail to connect Alpaca. Please check account info and internet connection." @@ -168,7 +169,7 @@ def run(self): print("Market closing soon. Closing positions.") threads = [] - positions = self.alpaca.list_positions() + positions = self.alpaca.get_all_positions() for position in positions: if position.side == "long": orderSide = "sell" @@ -280,7 +281,7 @@ def trade(self): else: # sell all when turbulence threads = [] - positions = self.alpaca.list_positions() + positions = self.alpaca.get_all_positions() for position in positions: if position.side == "long": orderSide = "sell" @@ -313,7 +314,7 @@ def get_state(self): ).astype(np.float32) tech = tech * 2**-7 - positions = self.alpaca.list_positions() + positions = self.alpaca.get_all_positions() stocks = [0] * len(self.stockUniverse) for position in positions: ind = self.stockUniverse.index(position.symbol) From d73afd76a4f70920dfea9deada0e6b4a5246949b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 8 Sep 2025 22:01:11 +0000 Subject: [PATCH 11/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- finrl/meta/paper_trading/alpaca.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/finrl/meta/paper_trading/alpaca.py b/finrl/meta/paper_trading/alpaca.py index c7c70290f6..1826ad2553 100644 --- a/finrl/meta/paper_trading/alpaca.py +++ b/finrl/meta/paper_trading/alpaca.py @@ -7,11 +7,11 @@ import time import alpaca -from alpaca.trading.client import TradingClient import gymnasium as gym import numpy as np import pandas as pd import torch +from alpaca.trading.client import TradingClient from finrl.meta.data_processors.processor_alpaca import AlpacaProcessor from finrl.meta.paper_trading.common import AgentPPO @@ -95,7 +95,9 @@ def __init__( # connect to Alpaca trading API try: - self.alpaca = TradingClient(api_key=API_KEY, secret_key=SECRET_KEY,paper=True) + self.alpaca = TradingClient( + api_key=API_KEY, secret_key=SECRET_KEY, paper=True + ) except: raise ValueError( "Fail to connect Alpaca. Please check account info and internet connection." From 47805eb8d0042b4d95dd180479819ccd64b824ec Mon Sep 17 00:00:00 2001 From: kuds Date: Wed, 10 Sep 2025 16:09:32 -0500 Subject: [PATCH 12/29] Update Colab link in PaperTrading demo notebook --- examples/FinRL_PaperTrading_Demo.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/FinRL_PaperTrading_Demo.ipynb b/examples/FinRL_PaperTrading_Demo.ipynb index 5b20b41cda..c759d784d7 100644 --- a/examples/FinRL_PaperTrading_Demo.ipynb +++ b/examples/FinRL_PaperTrading_Demo.ipynb @@ -13,7 +13,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "\"Open" + "\"Open" ] }, { From a65da260872142a013fcceffd79a6f412b87ac4a Mon Sep 17 00:00:00 2001 From: kuds Date: Wed, 10 Sep 2025 19:01:42 -0500 Subject: [PATCH 13/29] Remove unused alpaca import from alpaca.py Removed unused import of the alpaca module. --- finrl/meta/paper_trading/alpaca.py | 1 - 1 file changed, 1 deletion(-) diff --git a/finrl/meta/paper_trading/alpaca.py b/finrl/meta/paper_trading/alpaca.py index 1826ad2553..18980ba5c9 100644 --- a/finrl/meta/paper_trading/alpaca.py +++ b/finrl/meta/paper_trading/alpaca.py @@ -6,7 +6,6 @@ import threading import time -import alpaca import gymnasium as gym import numpy as np import pandas as pd From 2f1aced4e12379b27a0c3be61c19de159a3e35a7 Mon Sep 17 00:00:00 2001 From: kuds Date: Wed, 10 Sep 2025 19:26:05 -0500 Subject: [PATCH 14/29] Update order retrieval to use new request method --- finrl/meta/paper_trading/alpaca.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/finrl/meta/paper_trading/alpaca.py b/finrl/meta/paper_trading/alpaca.py index 18980ba5c9..0f4454132c 100644 --- a/finrl/meta/paper_trading/alpaca.py +++ b/finrl/meta/paper_trading/alpaca.py @@ -11,6 +11,8 @@ import pandas as pd import torch from alpaca.trading.client import TradingClient +from alpaca.trading.requests import GetOrdersRequest +from alpaca.trading.enums import OrderSide, QueryOrderStatus from finrl.meta.data_processors.processor_alpaca import AlpacaProcessor from finrl.meta.paper_trading.common import AgentPPO @@ -147,7 +149,13 @@ def test_latency(self, test_times=10): return latency def run(self): - orders = self.alpaca.list_orders(status="open") + # params to filter orders by + request_params = GetOrdersRequest( + status=QueryOrderStatus.OPEN + ) + + # orders that satisfy params + orders = self.alpaca.get_orders(filter=request_params) for order in orders: self.alpaca.cancel_order(order.id) From 01548f1df6a905dd9941df7d7f93bf3e77d3a430 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 11 Sep 2025 00:26:16 +0000 Subject: [PATCH 15/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- finrl/meta/paper_trading/alpaca.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/finrl/meta/paper_trading/alpaca.py b/finrl/meta/paper_trading/alpaca.py index 0f4454132c..68ffe58b3b 100644 --- a/finrl/meta/paper_trading/alpaca.py +++ b/finrl/meta/paper_trading/alpaca.py @@ -11,8 +11,9 @@ import pandas as pd import torch from alpaca.trading.client import TradingClient +from alpaca.trading.enums import OrderSide +from alpaca.trading.enums import QueryOrderStatus from alpaca.trading.requests import GetOrdersRequest -from alpaca.trading.enums import OrderSide, QueryOrderStatus from finrl.meta.data_processors.processor_alpaca import AlpacaProcessor from finrl.meta.paper_trading.common import AgentPPO @@ -150,9 +151,7 @@ def test_latency(self, test_times=10): def run(self): # params to filter orders by - request_params = GetOrdersRequest( - status=QueryOrderStatus.OPEN - ) + request_params = GetOrdersRequest(status=QueryOrderStatus.OPEN) # orders that satisfy params orders = self.alpaca.get_orders(filter=request_params) From ff360269113480d14ead107f9d42a51a8121332e Mon Sep 17 00:00:00 2001 From: kuds Date: Wed, 10 Sep 2025 19:29:30 -0500 Subject: [PATCH 16/29] Update typing and replace old alpaca python sdk with new version --- examples/FinRL_PaperTrading_Demo.ipynb | 3702 ++++++++++++------------ 1 file changed, 1820 insertions(+), 1882 deletions(-) diff --git a/examples/FinRL_PaperTrading_Demo.ipynb b/examples/FinRL_PaperTrading_Demo.ipynb index c759d784d7..67c15ae05c 100644 --- a/examples/FinRL_PaperTrading_Demo.ipynb +++ b/examples/FinRL_PaperTrading_Demo.ipynb @@ -1,1886 +1,1824 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "V1ofncK2cYhs" - }, - "source": [ - "Disclaimer: Nothing herein is financial advice, and NOT a recommendation to trade real money. Many platforms exist for simulated trading (paper trading) which can be used for building and developing the methods discussed. Please use common sense and always first consult a professional before trading or investing." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "j3mbRu3s1YlD" - }, - "source": [ - "# Part 1: Install FinRL" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "0gkmsPgbvNf6" - }, - "outputs": [], - "source": [ - "## install finrl library\n", - "!pip install git+https://github.com/AI4Finance-Foundation/FinRL.git\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3rwy7V72-8YY" - }, - "source": [ - "## Get the API Keys Ready" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "8Z6qlLXY-fA2" - }, - "outputs": [], - "source": [ - "API_KEY = \"\"\n", - "API_SECRET = \"\"\n", - "API_BASE_URL = 'https://paper-api.alpaca.markets'\n", - "data_url = 'wss://data.alpaca.markets'" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "--6Kx8I21erH" - }, - "source": [ - "## Import related modules" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "H7I7zsyYfoLJ", - "outputId": "1812c13b-410f-434c-eb07-29782ba186e6" - }, - "outputs": [], - "source": [ - "from finrl.config_tickers import DOW_30_TICKER\n", - "from finrl.config import INDICATORS\n", - "from finrl.meta.env_stock_trading.env_stocktrading_np import StockTradingEnv\n", - "\n", - "import numpy as np\n", - "import pandas as pd" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0EVJIQUR6_fu" - }, - "source": [ - "## PPO" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "-EYx40S84tzo" - }, - "outputs": [], - "source": [ - "import os\n", - "import time\n", - "import gym\n", - "import numpy as np\n", - "import numpy.random as rd\n", - "import torch\n", - "import torch.nn as nn\n", - "from torch import Tensor\n", - "from torch.distributions.normal import Normal\n", - "\n", - "\n", - "class ActorPPO(nn.Module):\n", - " def __init__(self, dims: [int], state_dim: int, action_dim: int):\n", - " super().__init__()\n", - " self.net = build_mlp(dims=[state_dim, *dims, action_dim])\n", - " self.action_std_log = nn.Parameter(torch.zeros((1, action_dim)), requires_grad=True) # trainable parameter\n", - "\n", - " def forward(self, state: Tensor) -> Tensor:\n", - " return self.net(state).tanh() # action.tanh()\n", - "\n", - " def get_action(self, state: Tensor) -> (Tensor, Tensor): # for exploration\n", - " action_avg = self.net(state)\n", - " action_std = self.action_std_log.exp()\n", - "\n", - " dist = Normal(action_avg, action_std)\n", - " action = dist.sample()\n", - " logprob = dist.log_prob(action).sum(1)\n", - " return action, logprob\n", - "\n", - " def get_logprob_entropy(self, state: Tensor, action: Tensor) -> (Tensor, Tensor):\n", - " action_avg = self.net(state)\n", - " action_std = self.action_std_log.exp()\n", - "\n", - " dist = Normal(action_avg, action_std)\n", - " logprob = dist.log_prob(action).sum(1)\n", - " entropy = dist.entropy().sum(1)\n", - " return logprob, entropy\n", - "\n", - " @staticmethod\n", - " def convert_action_for_env(action: Tensor) -> Tensor:\n", - " return action.tanh()\n", - "\n", - "\n", - "class CriticPPO(nn.Module):\n", - " def __init__(self, dims: [int], state_dim: int, _action_dim: int):\n", - " super().__init__()\n", - " self.net = build_mlp(dims=[state_dim, *dims, 1])\n", - "\n", - " def forward(self, state: Tensor) -> Tensor:\n", - " return self.net(state) # advantage value\n", - "\n", - "\n", - "def build_mlp(dims: [int]) -> nn.Sequential: # MLP (MultiLayer Perceptron)\n", - " net_list = []\n", - " for i in range(len(dims) - 1):\n", - " net_list.extend([nn.Linear(dims[i], dims[i + 1]), nn.ReLU()])\n", - " del net_list[-1] # remove the activation of output layer\n", - " return nn.Sequential(*net_list)\n", - "\n", - "\n", - "class Config:\n", - " def __init__(self, agent_class=None, env_class=None, env_args=None):\n", - " self.env_class = env_class # env = env_class(**env_args)\n", - " self.env_args = env_args # env = env_class(**env_args)\n", - "\n", - " if env_args is None: # dummy env_args\n", - " env_args = {'env_name': None, 'state_dim': None, 'action_dim': None, 'if_discrete': None}\n", - " self.env_name = env_args['env_name'] # the name of environment. Be used to set 'cwd'.\n", - " self.state_dim = env_args['state_dim'] # vector dimension (feature number) of state\n", - " self.action_dim = env_args['action_dim'] # vector dimension (feature number) of action\n", - " self.if_discrete = env_args['if_discrete'] # discrete or continuous action space\n", - "\n", - " self.agent_class = agent_class # agent = agent_class(...)\n", - "\n", - " '''Arguments for reward shaping'''\n", - " self.gamma = 0.99 # discount factor of future rewards\n", - " self.reward_scale = 1.0 # an approximate target reward usually be closed to 256\n", - "\n", - " '''Arguments for training'''\n", - " self.gpu_id = int(0) # `int` means the ID of single GPU, -1 means CPU\n", - " self.net_dims = (64, 32) # the middle layer dimension of MLP (MultiLayer Perceptron)\n", - " self.learning_rate = 6e-5 # 2 ** -14 ~= 6e-5\n", - " self.soft_update_tau = 5e-3 # 2 ** -8 ~= 5e-3\n", - " self.batch_size = int(128) # num of transitions sampled from replay buffer.\n", - " self.horizon_len = int(2000) # collect horizon_len step while exploring, then update network\n", - " self.buffer_size = None # ReplayBuffer size. Empty the ReplayBuffer for on-policy.\n", - " self.repeat_times = 8.0 # repeatedly update network using ReplayBuffer to keep critic's loss small\n", - "\n", - " '''Arguments for evaluate'''\n", - " self.cwd = None # current working directory to save model. None means set automatically\n", - " self.break_step = +np.inf # break training if 'total_step > break_step'\n", - " self.eval_times = int(32) # number of times that get episodic cumulative return\n", - " self.eval_per_step = int(2e4) # evaluate the agent per training steps\n", - "\n", - " def init_before_training(self):\n", - " if self.cwd is None: # set cwd (current working directory) for saving model\n", - " self.cwd = f'./{self.env_name}_{self.agent_class.__name__[5:]}'\n", - " os.makedirs(self.cwd, exist_ok=True)\n", - "\n", - "\n", - "def get_gym_env_args(env, if_print: bool) -> dict:\n", - " if {'unwrapped', 'observation_space', 'action_space', 'spec'}.issubset(dir(env)): # isinstance(env, gym.Env):\n", - " env_name = env.unwrapped.spec.id\n", - " state_shape = env.observation_space.shape\n", - " state_dim = state_shape[0] if len(state_shape) == 1 else state_shape # sometimes state_dim is a list\n", - "\n", - " if_discrete = isinstance(env.action_space, gym.spaces.Discrete)\n", - " if if_discrete: # make sure it is discrete action space\n", - " action_dim = env.action_space.n\n", - " elif isinstance(env.action_space, gym.spaces.Box): # make sure it is continuous action space\n", - " action_dim = env.action_space.shape[0]\n", - "\n", - " env_args = {'env_name': env_name, 'state_dim': state_dim, 'action_dim': action_dim, 'if_discrete': if_discrete}\n", - " print(f\"env_args = {repr(env_args)}\") if if_print else None\n", - " return env_args\n", - "\n", - "\n", - "def kwargs_filter(function, kwargs: dict) -> dict:\n", - " import inspect\n", - " sign = inspect.signature(function).parameters.values()\n", - " sign = {val.name for val in sign}\n", - " common_args = sign.intersection(kwargs.keys())\n", - " return {key: kwargs[key] for key in common_args} # filtered kwargs\n", - "\n", - "\n", - "def build_env(env_class=None, env_args=None):\n", - " if env_class.__module__ == 'gym.envs.registration': # special rule\n", - " env = env_class(id=env_args['env_name'])\n", - " else:\n", - " env = env_class(**kwargs_filter(env_class.__init__, env_args.copy()))\n", - " for attr_str in ('env_name', 'state_dim', 'action_dim', 'if_discrete'):\n", - " setattr(env, attr_str, env_args[attr_str])\n", - " return env\n", - "\n", - "\n", - "class AgentBase:\n", - " def __init__(self, net_dims: [int], state_dim: int, action_dim: int, gpu_id: int = 0, args: Config = Config()):\n", - " self.state_dim = state_dim\n", - " self.action_dim = action_dim\n", - "\n", - " self.gamma = args.gamma\n", - " self.batch_size = args.batch_size\n", - " self.repeat_times = args.repeat_times\n", - " self.reward_scale = args.reward_scale\n", - " self.soft_update_tau = args.soft_update_tau\n", - "\n", - " self.states = None # assert self.states == (1, state_dim)\n", - " self.device = torch.device(f\"cuda:{gpu_id}\" if (torch.cuda.is_available() and (gpu_id >= 0)) else \"cpu\")\n", - "\n", - " act_class = getattr(self, \"act_class\", None)\n", - " cri_class = getattr(self, \"cri_class\", None)\n", - " self.act = self.act_target = act_class(net_dims, state_dim, action_dim).to(self.device)\n", - " self.cri = self.cri_target = cri_class(net_dims, state_dim, action_dim).to(self.device) \\\n", - " if cri_class else self.act\n", - "\n", - " self.act_optimizer = torch.optim.Adam(self.act.parameters(), args.learning_rate)\n", - " self.cri_optimizer = torch.optim.Adam(self.cri.parameters(), args.learning_rate) \\\n", - " if cri_class else self.act_optimizer\n", - "\n", - " self.criterion = torch.nn.SmoothL1Loss()\n", - "\n", - " @staticmethod\n", - " def optimizer_update(optimizer, objective: Tensor):\n", - " optimizer.zero_grad()\n", - " objective.backward()\n", - " optimizer.step()\n", - "\n", - " @staticmethod\n", - " def soft_update(target_net: torch.nn.Module, current_net: torch.nn.Module, tau: float):\n", - " for tar, cur in zip(target_net.parameters(), current_net.parameters()):\n", - " tar.data.copy_(cur.data * tau + tar.data * (1.0 - tau))\n", - "\n", - "\n", - "class AgentPPO(AgentBase):\n", - " def __init__(self, net_dims: [int], state_dim: int, action_dim: int, gpu_id: int = 0, args: Config = Config()):\n", - " self.if_off_policy = False\n", - " self.act_class = getattr(self, \"act_class\", ActorPPO)\n", - " self.cri_class = getattr(self, \"cri_class\", CriticPPO)\n", - " AgentBase.__init__(self, net_dims, state_dim, action_dim, gpu_id, args)\n", - "\n", - " self.ratio_clip = getattr(args, \"ratio_clip\", 0.25) # `ratio.clamp(1 - clip, 1 + clip)`\n", - " self.lambda_gae_adv = getattr(args, \"lambda_gae_adv\", 0.95) # could be 0.80~0.99\n", - " self.lambda_entropy = getattr(args, \"lambda_entropy\", 0.01) # could be 0.00~0.10\n", - " self.lambda_entropy = torch.tensor(self.lambda_entropy, dtype=torch.float32, device=self.device)\n", - "\n", - " def explore_env(self, env, horizon_len: int) -> [Tensor]:\n", - " states = torch.zeros((horizon_len, self.state_dim), dtype=torch.float32).to(self.device)\n", - " actions = torch.zeros((horizon_len, self.action_dim), dtype=torch.float32).to(self.device)\n", - " logprobs = torch.zeros(horizon_len, dtype=torch.float32).to(self.device)\n", - " rewards = torch.zeros(horizon_len, dtype=torch.float32).to(self.device)\n", - " dones = torch.zeros(horizon_len, dtype=torch.bool).to(self.device)\n", - "\n", - " ary_state = self.states[0]\n", - "\n", - " get_action = self.act.get_action\n", - " convert = self.act.convert_action_for_env\n", - " for i in range(horizon_len):\n", - " state = torch.as_tensor(ary_state, dtype=torch.float32, device=self.device)\n", - " action, logprob = [t.squeeze(0) for t in get_action(state.unsqueeze(0))[:2]]\n", - "\n", - " ary_action = convert(action).detach().cpu().numpy()\n", - " ary_state, reward, done, _, _ = env.step(ary_action)\n", - " if done:\n", - " ary_state, _ = env.reset()\n", - "\n", - " states[i] = state\n", - " actions[i] = action\n", - " logprobs[i] = logprob\n", - " rewards[i] = reward\n", - " dones[i] = done\n", - "\n", - " self.states[0] = ary_state\n", - " rewards = (rewards * self.reward_scale).unsqueeze(1)\n", - " undones = (1 - dones.type(torch.float32)).unsqueeze(1)\n", - " return states, actions, logprobs, rewards, undones\n", - "\n", - " def update_net(self, buffer) -> [float]:\n", - " with torch.no_grad():\n", - " states, actions, logprobs, rewards, undones = buffer\n", - " buffer_size = states.shape[0]\n", - "\n", - " '''get advantages reward_sums'''\n", - " bs = 2 ** 10 # set a smaller 'batch_size' when out of GPU memory.\n", - " values = [self.cri(states[i:i + bs]) for i in range(0, buffer_size, bs)]\n", - " values = torch.cat(values, dim=0).squeeze(1) # values.shape == (buffer_size, )\n", - "\n", - " advantages = self.get_advantages(rewards, undones, values) # advantages.shape == (buffer_size, )\n", - " reward_sums = advantages + values # reward_sums.shape == (buffer_size, )\n", - " del rewards, undones, values\n", - "\n", - " advantages = (advantages - advantages.mean()) / (advantages.std(dim=0) + 1e-5)\n", - " assert logprobs.shape == advantages.shape == reward_sums.shape == (buffer_size,)\n", - "\n", - " '''update network'''\n", - " obj_critics = 0.0\n", - " obj_actors = 0.0\n", - "\n", - " update_times = int(buffer_size * self.repeat_times / self.batch_size)\n", - " assert update_times >= 1\n", - " for _ in range(update_times):\n", - " indices = torch.randint(buffer_size, size=(self.batch_size,), requires_grad=False)\n", - " state = states[indices]\n", - " action = actions[indices]\n", - " logprob = logprobs[indices]\n", - " advantage = advantages[indices]\n", - " reward_sum = reward_sums[indices]\n", - "\n", - " value = self.cri(state).squeeze(1) # critic network predicts the reward_sum (Q value) of state\n", - " obj_critic = self.criterion(value, reward_sum)\n", - " self.optimizer_update(self.cri_optimizer, obj_critic)\n", - "\n", - " new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action)\n", - " ratio = (new_logprob - logprob.detach()).exp()\n", - " surrogate1 = advantage * ratio\n", - " surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip)\n", - " obj_surrogate = torch.min(surrogate1, surrogate2).mean()\n", - "\n", - " obj_actor = obj_surrogate + obj_entropy.mean() * self.lambda_entropy\n", - " self.optimizer_update(self.act_optimizer, -obj_actor)\n", - "\n", - " obj_critics += obj_critic.item()\n", - " obj_actors += obj_actor.item()\n", - " a_std_log = getattr(self.act, 'a_std_log', torch.zeros(1)).mean()\n", - " return obj_critics / update_times, obj_actors / update_times, a_std_log.item()\n", - "\n", - " def get_advantages(self, rewards: Tensor, undones: Tensor, values: Tensor) -> Tensor:\n", - " advantages = torch.empty_like(values) # advantage value\n", - "\n", - " masks = undones * self.gamma\n", - " horizon_len = rewards.shape[0]\n", - "\n", - " next_state = torch.tensor(self.states, dtype=torch.float32).to(self.device)\n", - " next_value = self.cri(next_state).detach()[0, 0]\n", - "\n", - " advantage = 0 # last_gae_lambda\n", - " for t in range(horizon_len - 1, -1, -1):\n", - " delta = rewards[t] + masks[t] * next_value - values[t]\n", - " advantages[t] = advantage = delta + masks[t] * self.lambda_gae_adv * advantage\n", - " next_value = values[t]\n", - " return advantages\n", - "\n", - "\n", - "class PendulumEnv(gym.Wrapper): # a demo of custom gym env\n", - " def __init__(self):\n", - " gym.logger.set_level(40) # Block warning\n", - " gym_env_name = \"Pendulum-v0\" if gym.__version__ < '0.18.0' else \"Pendulum-v1\"\n", - " super().__init__(env=gym.make(gym_env_name))\n", - "\n", - " '''the necessary env information when you design a custom env'''\n", - " self.env_name = gym_env_name # the name of this env.\n", - " self.state_dim = self.observation_space.shape[0] # feature number of state\n", - " self.action_dim = self.action_space.shape[0] # feature number of action\n", - " self.if_discrete = False # discrete action or continuous action\n", - "\n", - " def reset(self) -> np.ndarray: # reset the agent in env\n", - " resetted_env, _ = self.env.reset()\n", - " return resetted_env\n", - "\n", - " def step(self, action: np.ndarray) -> (np.ndarray, float, bool, dict): # agent interacts in env\n", - " # We suggest that adjust action space to (-1, +1) when designing a custom env.\n", - " state, reward, done, info_dict, _ = self.env.step(action * 2)\n", - " return state.reshape(self.state_dim), float(reward), done, info_dict\n", - "\n", - " \n", - "def train_agent(args: Config):\n", - " args.init_before_training()\n", - "\n", - " env = build_env(args.env_class, args.env_args)\n", - " agent = args.agent_class(args.net_dims, args.state_dim, args.action_dim, gpu_id=args.gpu_id, args=args)\n", - "\n", - " new_env, _ = env.reset()\n", - " agent.states = new_env[np.newaxis, :]\n", - "\n", - " evaluator = Evaluator(eval_env=build_env(args.env_class, args.env_args),\n", - " eval_per_step=args.eval_per_step,\n", - " eval_times=args.eval_times,\n", - " cwd=args.cwd)\n", - " torch.set_grad_enabled(False)\n", - " while True: # start training\n", - " buffer_items = agent.explore_env(env, args.horizon_len)\n", - "\n", - " torch.set_grad_enabled(True)\n", - " logging_tuple = agent.update_net(buffer_items)\n", - " torch.set_grad_enabled(False)\n", - "\n", - " evaluator.evaluate_and_save(agent.act, args.horizon_len, logging_tuple)\n", - " if (evaluator.total_step > args.break_step) or os.path.exists(f\"{args.cwd}/stop\"):\n", - " torch.save(agent.act.state_dict(), args.cwd + '/actor.pth')\n", - " break # stop training when reach `break_step` or `mkdir cwd/stop`\n", - "\n", - "\n", - "def render_agent(env_class, env_args: dict, net_dims: [int], agent_class, actor_path: str, render_times: int = 8):\n", - " env = build_env(env_class, env_args)\n", - "\n", - " state_dim = env_args['state_dim']\n", - " action_dim = env_args['action_dim']\n", - " agent = agent_class(net_dims, state_dim, action_dim, gpu_id=-1)\n", - " actor = agent.act\n", - "\n", - " print(f\"| render and load actor from: {actor_path}\")\n", - " actor.load_state_dict(torch.load(actor_path, map_location=lambda storage, loc: storage))\n", - " for i in range(render_times):\n", - " cumulative_reward, episode_step = get_rewards_and_steps(env, actor, if_render=True)\n", - " print(f\"|{i:4} cumulative_reward {cumulative_reward:9.3f} episode_step {episode_step:5.0f}\")\n", - "\n", - " \n", - "class Evaluator:\n", - " def __init__(self, eval_env, eval_per_step: int = 1e4, eval_times: int = 8, cwd: str = '.'):\n", - " self.cwd = cwd\n", - " self.env_eval = eval_env\n", - " self.eval_step = 0\n", - " self.total_step = 0\n", - " self.start_time = time.time()\n", - " self.eval_times = eval_times # number of times that get episodic cumulative return\n", - " self.eval_per_step = eval_per_step # evaluate the agent per training steps\n", - "\n", - " self.recorder = []\n", - " print(f\"\\n| `step`: Number of samples, or total training steps, or running times of `env.step()`.\"\n", - " f\"\\n| `time`: Time spent from the start of training to this moment.\"\n", - " f\"\\n| `avgR`: Average value of cumulative rewards, which is the sum of rewards in an episode.\"\n", - " f\"\\n| `stdR`: Standard dev of cumulative rewards, which is the sum of rewards in an episode.\"\n", - " f\"\\n| `avgS`: Average of steps in an episode.\"\n", - " f\"\\n| `objC`: Objective of Critic network. Or call it loss function of critic network.\"\n", - " f\"\\n| `objA`: Objective of Actor network. It is the average Q value of the critic network.\"\n", - " f\"\\n| {'step':>8} {'time':>8} | {'avgR':>8} {'stdR':>6} {'avgS':>6} | {'objC':>8} {'objA':>8}\")\n", - " \n", - " def evaluate_and_save(self, actor, horizon_len: int, logging_tuple: tuple):\n", - " self.total_step += horizon_len\n", - " if self.eval_step + self.eval_per_step > self.total_step:\n", - " return\n", - " self.eval_step = self.total_step\n", - "\n", - " rewards_steps_ary = [get_rewards_and_steps(self.env_eval, actor) for _ in range(self.eval_times)]\n", - " rewards_steps_ary = np.array(rewards_steps_ary, dtype=np.float32)\n", - " avg_r = rewards_steps_ary[:, 0].mean() # average of cumulative rewards\n", - " std_r = rewards_steps_ary[:, 0].std() # std of cumulative rewards\n", - " avg_s = rewards_steps_ary[:, 1].mean() # average of steps in an episode\n", - "\n", - " used_time = time.time() - self.start_time\n", - " self.recorder.append((self.total_step, used_time, avg_r))\n", - " \n", - " print(f\"| {self.total_step:8.2e} {used_time:8.0f} \"\n", - " f\"| {avg_r:8.2f} {std_r:6.2f} {avg_s:6.0f} \"\n", - " f\"| {logging_tuple[0]:8.2f} {logging_tuple[1]:8.2f}\")\n", - "\n", - "\n", - "def get_rewards_and_steps(env, actor, if_render: bool = False) -> (float, int): # cumulative_rewards and episode_steps\n", - " device = next(actor.parameters()).device # net.parameters() is a Python generator.\n", - "\n", - " state, _ = env.reset()\n", - " episode_steps = 0\n", - " cumulative_returns = 0.0 # sum of rewards in an episode\n", - " for episode_steps in range(12345):\n", - " tensor_state = torch.as_tensor(state, dtype=torch.float32, device=device).unsqueeze(0)\n", - " tensor_action = actor(tensor_state)\n", - " action = tensor_action.detach().cpu().numpy()[0] # not need detach(), because using torch.no_grad() outside\n", - " state, reward, done, _, _ = env.step(action)\n", - " cumulative_returns += reward\n", - "\n", - " if if_render:\n", - " env.render()\n", - " if done:\n", - " break\n", - " return cumulative_returns, episode_steps + 1" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9tzAw9k26nAC" - }, - "source": [ - "## DRL Agent Class" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "pwCbbocm6PHM" - }, - "outputs": [], - "source": [ - "from __future__ import annotations\n", - "\n", - "import torch\n", - "# from elegantrl.agents import AgentA2C\n", - "\n", - "MODELS = {\"ppo\": AgentPPO}\n", - "OFF_POLICY_MODELS = [\"ddpg\", \"td3\", \"sac\"]\n", - "ON_POLICY_MODELS = [\"ppo\"]\n", - "# MODEL_KWARGS = {x: config.__dict__[f\"{x.upper()}_PARAMS\"] for x in MODELS.keys()}\n", - "#\n", - "# NOISE = {\n", - "# \"normal\": NormalActionNoise,\n", - "# \"ornstein_uhlenbeck\": OrnsteinUhlenbeckActionNoise,\n", - "# }\n", - "\n", - "\n", - "class DRLAgent:\n", - " \"\"\"Implementations of DRL algorithms\n", - " Attributes\n", - " ----------\n", - " env: gym environment class\n", - " user-defined class\n", - " Methods\n", - " -------\n", - " get_model()\n", - " setup DRL algorithms\n", - " train_model()\n", - " train DRL algorithms in a train dataset\n", - " and output the trained model\n", - " DRL_prediction()\n", - " make a prediction in a test dataset and get results\n", - " \"\"\"\n", - "\n", - " def __init__(self, env, price_array, tech_array, turbulence_array):\n", - " self.env = env\n", - " self.price_array = price_array\n", - " self.tech_array = tech_array\n", - " self.turbulence_array = turbulence_array\n", - "\n", - " def get_model(self, model_name, model_kwargs):\n", - " env_config = {\n", - " \"price_array\": self.price_array,\n", - " \"tech_array\": self.tech_array,\n", - " \"turbulence_array\": self.turbulence_array,\n", - " \"if_train\": True,\n", - " }\n", - " environment = self.env(config=env_config)\n", - " env_args = {'config': env_config,\n", - " 'env_name': environment.env_name,\n", - " 'state_dim': environment.state_dim,\n", - " 'action_dim': environment.action_dim,\n", - " 'if_discrete': False}\n", - " agent = MODELS[model_name]\n", - " if model_name not in MODELS:\n", - " raise NotImplementedError(\"NotImplementedError\")\n", - " model = Config(agent_class=agent, env_class=self.env, env_args=env_args)\n", - " model.if_off_policy = model_name in OFF_POLICY_MODELS\n", - " if model_kwargs is not None:\n", - " try:\n", - " model.learning_rate = model_kwargs[\"learning_rate\"]\n", - " model.batch_size = model_kwargs[\"batch_size\"]\n", - " model.gamma = model_kwargs[\"gamma\"]\n", - " model.seed = model_kwargs[\"seed\"]\n", - " model.net_dims = model_kwargs[\"net_dimension\"]\n", - " model.target_step = model_kwargs[\"target_step\"]\n", - " model.eval_gap = model_kwargs[\"eval_gap\"]\n", - " model.eval_times = model_kwargs[\"eval_times\"]\n", - " except BaseException:\n", - " raise ValueError(\n", - " \"Fail to read arguments, please check 'model_kwargs' input.\"\n", - " )\n", - " return model\n", - "\n", - " def train_model(self, model, cwd, total_timesteps=5000):\n", - " model.cwd = cwd\n", - " model.break_step = total_timesteps\n", - " train_agent(model)\n", - "\n", - " @staticmethod\n", - " def DRL_prediction(model_name, cwd, net_dimension, environment):\n", - " if model_name not in MODELS:\n", - " raise NotImplementedError(\"NotImplementedError\")\n", - " agent_class = MODELS[model_name]\n", - " environment.env_num = 1\n", - " agent = agent_class(net_dimension, environment.state_dim, environment.action_dim)\n", - " actor = agent.act\n", - " # load agent\n", - " try: \n", - " cwd = cwd + '/actor.pth'\n", - " print(f\"| load actor from: {cwd}\")\n", - " actor.load_state_dict(torch.load(cwd, map_location=lambda storage, loc: storage))\n", - " act = actor\n", - " device = agent.device\n", - " except BaseException:\n", - " raise ValueError(\"Fail to load agent!\")\n", - "\n", - " # test on the testing env\n", - " _torch = torch\n", - " state, _ = environment.reset()\n", - " episode_returns = [] # the cumulative_return / initial_account\n", - " episode_total_assets = [environment.initial_total_asset]\n", - " with _torch.no_grad():\n", - " for i in range(environment.max_step):\n", - " s_tensor = _torch.as_tensor((state,), device=device)\n", - " a_tensor = act(s_tensor) # action_tanh = act.forward()\n", - " action = (\n", - " a_tensor.detach().cpu().numpy()[0]\n", - " ) # not need detach(), because with torch.no_grad() outside\n", - " state, reward, done, _, _ = environment.step(action)\n", - "\n", - " total_asset = (\n", - " environment.amount\n", - " + (\n", - " environment.price_ary[environment.day] * environment.stocks\n", - " ).sum()\n", - " )\n", - " episode_total_assets.append(total_asset)\n", - " episode_return = total_asset / environment.initial_total_asset\n", - " episode_returns.append(episode_return)\n", - " if done:\n", - " break\n", - " print(\"Test Finished!\")\n", - " # return episode total_assets on testing data\n", - " print(\"episode_return\", episode_return)\n", - " return episode_total_assets\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "zjLda8No6pvI" - }, - "source": [ - "## Train & Test Functions" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "j8-e03ev32oz" - }, - "outputs": [], - "source": [ - "from __future__ import annotations\n", - "\n", - "from finrl.meta.data_processor import DataProcessor\n", - "\n", - "def train(\n", - " start_date,\n", - " end_date,\n", - " ticker_list,\n", - " data_source,\n", - " time_interval,\n", - " technical_indicator_list,\n", - " drl_lib,\n", - " env,\n", - " model_name,\n", - " if_vix=True,\n", - " **kwargs,\n", - "):\n", - " # download data\n", - " dp = DataProcessor(data_source, **kwargs)\n", - " data = dp.download_data(ticker_list, start_date, end_date, time_interval)\n", - " data = dp.clean_data(data)\n", - " data = dp.add_technical_indicator(data, technical_indicator_list)\n", - " if if_vix:\n", - " data = dp.add_vix(data)\n", - " else:\n", - " data = dp.add_turbulence(data)\n", - " price_array, tech_array, turbulence_array = dp.df_to_array(data, if_vix)\n", - " env_config = {\n", - " \"price_array\": price_array,\n", - " \"tech_array\": tech_array,\n", - " \"turbulence_array\": turbulence_array,\n", - " \"if_train\": True,\n", - " }\n", - " env_instance = env(config=env_config)\n", - "\n", - " # read parameters\n", - " cwd = kwargs.get(\"cwd\", \"./\" + str(model_name))\n", - "\n", - " if drl_lib == \"elegantrl\":\n", - " DRLAgent_erl = DRLAgent\n", - " break_step = kwargs.get(\"break_step\", 1e6)\n", - " erl_params = kwargs.get(\"erl_params\")\n", - " agent = DRLAgent_erl(\n", - " env=env,\n", - " price_array=price_array,\n", - " tech_array=tech_array,\n", - " turbulence_array=turbulence_array,\n", - " )\n", - " model = agent.get_model(model_name, model_kwargs=erl_params)\n", - " trained_model = agent.train_model(\n", - " model=model, cwd=cwd, total_timesteps=break_step\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Evsg8QtEDHDO" - }, - "outputs": [], - "source": [ - "from __future__ import annotations\n", - "\n", - "from finrl.config import INDICATORS\n", - "from finrl.config import RLlib_PARAMS\n", - "from finrl.config import TEST_END_DATE\n", - "from finrl.config import TEST_START_DATE\n", - "from finrl.config_tickers import DOW_30_TICKER\n", - "\n", - "def test(\n", - " start_date,\n", - " end_date,\n", - " ticker_list,\n", - " data_source,\n", - " time_interval,\n", - " technical_indicator_list,\n", - " drl_lib,\n", - " env,\n", - " model_name,\n", - " if_vix=True,\n", - " **kwargs,\n", - "):\n", - "\n", - " # import data processor\n", - " from finrl.meta.data_processor import DataProcessor\n", - "\n", - " # fetch data\n", - " dp = DataProcessor(data_source, **kwargs)\n", - " data = dp.download_data(ticker_list, start_date, end_date, time_interval)\n", - " data = dp.clean_data(data)\n", - " data = dp.add_technical_indicator(data, technical_indicator_list)\n", - "\n", - " if if_vix:\n", - " data = dp.add_vix(data)\n", - " else:\n", - " data = dp.add_turbulence(data)\n", - " price_array, tech_array, turbulence_array = dp.df_to_array(data, if_vix)\n", - "\n", - " env_config = {\n", - " \"price_array\": price_array,\n", - " \"tech_array\": tech_array,\n", - " \"turbulence_array\": turbulence_array,\n", - " \"if_train\": False,\n", - " }\n", - " env_instance = env(config=env_config)\n", - "\n", - " # load elegantrl needs state dim, action dim and net dim\n", - " net_dimension = kwargs.get(\"net_dimension\", 2**7)\n", - " cwd = kwargs.get(\"cwd\", \"./\" + str(model_name))\n", - " print(\"price_array: \", len(price_array))\n", - "\n", - " if drl_lib == \"elegantrl\":\n", - " DRLAgent_erl = DRLAgent\n", - " episode_total_assets = DRLAgent_erl.DRL_prediction(\n", - " model_name=model_name,\n", - " cwd=cwd,\n", - " net_dimension=net_dimension,\n", - " environment=env_instance,\n", - " )\n", - " return episode_total_assets" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pf5aVHAU-xF6" - }, - "source": [ - "## Import Dow Jones 30 Symbols" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "jx25TA_X87F-" - }, - "outputs": [], - "source": [ - "ticker_list = DOW_30_TICKER\n", - "action_dim = len(DOW_30_TICKER)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "UIV0kO_y-inG", - "outputId": "bd7b3c21-641e-4eb7-a4af-ae7d156042a6" - }, - "outputs": [], - "source": [ - "print(ticker_list)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "CnqQ-cC5-rfO", - "outputId": "29b248c9-ec98-44cd-befb-65192af72ea4" - }, - "outputs": [], - "source": [ - "print(INDICATORS)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rZMkcyjZ-25l" - }, - "source": [ - "## Calculate the DRL state dimension manually for paper trading" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "GLfkTsXK-e90" - }, - "outputs": [], - "source": [ - "# amount + (turbulence, turbulence_bool) + (price, shares, cd (holding time)) * stock_dim + tech_dim\n", - "state_dim = 1 + 2 + 3 * action_dim + len(INDICATORS) * action_dim" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "QqUkvImG-n66", - "outputId": "9cb4a3d8-5064-4971-d095-65d3ab12f11a" - }, - "outputs": [], - "source": [ - "state_dim" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "8Z6qlLXY-fA2" - }, - "outputs": [], - "source": [ - "env = StockTradingEnv" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "J25MuZLiGqCP" - }, - "source": [ - "## Show the data" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "puJZWm8NHtSN" - }, - "source": [ - "### Step 1. Pick a data source" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "3ZCru8f7GqgL", - "outputId": "010e6a83-1280-410a-e240-4bc8ec124774" - }, - "outputs": [], - "source": [ - "#DP = DataProcessor(data_source = 'alpaca',\n", - "# API_KEY = API_KEY, \n", - "# API_SECRET = API_SECRET, \n", - "# API_BASE_URL = API_BASE_URL\n", - "# )" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nvPEW2mYHvkR" - }, - "source": [ - "### Step 2. Get ticker list, Set start date and end date, specify the data frequency" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "NPNxj6c8HIiE" - }, - "outputs": [], - "source": [ - "#data = DP.download_data(start_date = '2021-10-04', \n", - "# end_date = '2021-10-08',\n", - "# ticker_list = ticker_list, \n", - "# time_interval= '1Min')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "pPcazCq1d5ec", - "outputId": "39d61284-7b51-46c2-cc2d-424f0f569e25" - }, - "outputs": [], - "source": [ - "#data['timestamp'].nunique()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "i46jGdE0IAel" - }, - "source": [ - "### Step 3. Data Cleaning & Feature Engineering" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "x9euUsEPHWFK", - "outputId": "2ae7fae7-d9ae-4f34-f32a-13e1476debea" - }, - "outputs": [], - "source": [ - "#data = DP.clean_data(data)\n", - "#data = DP.add_technical_indicator(data, INDICATORS)\n", - "#data = DP.add_vix(data)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "GOcPTaAgHdxa", - "outputId": "4da334de-fbf6-49ca-ed22-bf1a99469457" - }, - "outputs": [], - "source": [ - "#data.shape" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "bbu03L_UIMWt" - }, - "source": [ - "### Step 4. Transform to numpy array" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Rzj0vjZZHdGM", - "outputId": "d0ec43a2-b78e-4c09-c048-b88e7eba6c81" - }, - "outputs": [], - "source": [ - "#price_array, tech_array, turbulence_array = DP.df_to_array(data, if_vix=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "eW0UDAXI1nEa" - }, - "source": [ - "# Part 2: Train the agent" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "lArLOFcJ7VMO" - }, - "source": [ - "## Train" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "g1F84mebj4gu" - }, - "outputs": [], - "source": [ - "ERL_PARAMS = {\"learning_rate\": 3e-6,\"batch_size\": 2048,\"gamma\": 0.985,\n", - " \"seed\":312,\"net_dimension\":[128,64], \"target_step\":5000, \"eval_gap\":30,\n", - " \"eval_times\":1} \n", - "env = StockTradingEnv\n", - "#if you want to use larger datasets (change to longer period), and it raises error, \n", - "#please try to increase \"target_step\". It should be larger than the episode steps. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "BxcNI2fdNjip", - "outputId": "8db09736-a3a1-48a2-9e61-f9d8828ee327" - }, - "outputs": [], - "source": [ - "train(start_date = '2022-08-25', \n", - " end_date = '2022-08-31',\n", - " ticker_list = ticker_list, \n", - " data_source = 'alpaca',\n", - " time_interval= '1Min', \n", - " technical_indicator_list= INDICATORS,\n", - " drl_lib='elegantrl', \n", - " env=env,\n", - " model_name='ppo',\n", - " if_vix=True, \n", - " API_KEY = API_KEY, \n", - " API_SECRET = API_SECRET, \n", - " API_BASE_URL = API_BASE_URL,\n", - " erl_params=ERL_PARAMS,\n", - " cwd='./papertrading_erl', #current_working_dir\n", - " break_step=1e5)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "g37WugV_1pAS" - }, - "source": [ - "## Test" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "SxYoWCDa02TW" - }, - "outputs": [], - "source": [ - "account_value_erl=test(start_date = '2022-09-01', \n", - " end_date = '2022-09-02',\n", - " ticker_list = ticker_list, \n", - " data_source = 'alpaca',\n", - " time_interval= '1Min', \n", - " technical_indicator_list= INDICATORS,\n", - " drl_lib='elegantrl', \n", - " env=env, \n", - " model_name='ppo',\n", - " if_vix=True, \n", - " API_KEY = API_KEY, \n", - " API_SECRET = API_SECRET, \n", - " API_BASE_URL = API_BASE_URL,\n", - " cwd='./papertrading_erl',\n", - " net_dimension = ERL_PARAMS['net_dimension'])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "e8aNQ58X7avM" - }, - "source": [ - "## Use full data to train " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3CQ9_Yv41r88" - }, - "source": [ - "After tuning well, retrain on the training and testing sets" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "cUSgbwt_10V3", - "outputId": "50f3d8c6-b333-480e-b2fb-25e566797806" - }, - "outputs": [], - "source": [ - "train(start_date = '2022-08-25', \n", - " end_date = '2022-09-02',\n", - " ticker_list = ticker_list, \n", - " data_source = 'alpaca',\n", - " time_interval= '1Min', \n", - " technical_indicator_list= INDICATORS,\n", - " drl_lib='elegantrl', \n", - " env=env, \n", - " model_name='ppo',\n", - " if_vix=True, \n", - " API_KEY = API_KEY, \n", - " API_SECRET = API_SECRET, \n", - " API_BASE_URL = API_BASE_URL,\n", - " erl_params=ERL_PARAMS,\n", - " cwd='./papertrading_erl_retrain',\n", - " break_step=2e5)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "sIQN6Ggt7gXY" - }, - "source": [ - "# Part 3: Deploy the agent" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UFoxkigg1zXa" - }, - "source": [ - "## Setup Alpaca Paper trading environment" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "LpkoZpYzfneS" - }, - "outputs": [], - "source": [ - "import datetime\n", - "import threading\n", - "from finrl.meta.data_processors.processor_alpaca import AlpacaProcessor\n", - "import alpaca_trade_api as tradeapi\n", - "import time\n", - "import pandas as pd\n", - "import numpy as np\n", - "import torch\n", - "import gym\n", - "\n", - "class AlpacaPaperTrading():\n", - "\n", - " def __init__(self,ticker_list, time_interval, drl_lib, agent, cwd, net_dim, \n", - " state_dim, action_dim, API_KEY, API_SECRET, \n", - " API_BASE_URL, tech_indicator_list, turbulence_thresh=30, \n", - " max_stock=1e2, latency = None):\n", - " #load agent\n", - " self.drl_lib = drl_lib\n", - " if agent =='ppo':\n", - " if drl_lib == 'elegantrl': \n", - " agent_class = AgentPPO\n", - " agent = agent_class(net_dim, state_dim, action_dim)\n", - " actor = agent.act\n", - " # load agent\n", - " try: \n", - " cwd = cwd + '/actor.pth'\n", - " print(f\"| load actor from: {cwd}\")\n", - " actor.load_state_dict(torch.load(cwd, map_location=lambda storage, loc: storage))\n", - " self.act = actor\n", - " self.device = agent.device\n", - " except BaseException:\n", - " raise ValueError(\"Fail to load agent!\")\n", - " \n", - " elif drl_lib == 'rllib':\n", - " from ray.rllib.agents import ppo\n", - " from ray.rllib.agents.ppo.ppo import PPOTrainer\n", - " \n", - " config = ppo.DEFAULT_CONFIG.copy()\n", - " config['env'] = StockEnvEmpty\n", - " config[\"log_level\"] = \"WARN\"\n", - " config['env_config'] = {'state_dim':state_dim,\n", - " 'action_dim':action_dim,}\n", - " trainer = PPOTrainer(env=StockEnvEmpty, config=config)\n", - " trainer.restore(cwd)\n", - " try:\n", - " trainer.restore(cwd)\n", - " self.agent = trainer\n", - " print(\"Restoring from checkpoint path\", cwd)\n", - " except:\n", - " raise ValueError('Fail to load agent!')\n", - " \n", - " elif drl_lib == 'stable_baselines3':\n", - " from stable_baselines3 import PPO\n", - " \n", - " try:\n", - " #load agent\n", - " self.model = PPO.load(cwd)\n", - " print(\"Successfully load model\", cwd)\n", - " except:\n", - " raise ValueError('Fail to load agent!')\n", - " \n", - " else:\n", - " raise ValueError('The DRL library input is NOT supported yet. Please check your input.')\n", - " \n", - " else:\n", - " raise ValueError('Agent input is NOT supported yet.')\n", - " \n", - " \n", - " \n", - " #connect to Alpaca trading API\n", - " try:\n", - " self.alpaca = tradeapi.REST(API_KEY,API_SECRET,API_BASE_URL, 'v2')\n", - " except:\n", - " raise ValueError('Fail to connect Alpaca. Please check account info and internet connection.')\n", - " \n", - " #read trading time interval\n", - " if time_interval == '1s':\n", - " self.time_interval = 1\n", - " elif time_interval == '5s':\n", - " self.time_interval = 5\n", - " elif time_interval == '1Min':\n", - " self.time_interval = 60\n", - " elif time_interval == '5Min':\n", - " self.time_interval = 60 * 5\n", - " elif time_interval == '15Min':\n", - " self.time_interval = 60 * 15\n", - " else:\n", - " raise ValueError('Time interval input is NOT supported yet.')\n", - " \n", - " #read trading settings\n", - " self.tech_indicator_list = tech_indicator_list\n", - " self.turbulence_thresh = turbulence_thresh\n", - " self.max_stock = max_stock \n", - " \n", - " #initialize account\n", - " self.stocks = np.asarray([0] * len(ticker_list)) #stocks holding\n", - " self.stocks_cd = np.zeros_like(self.stocks) \n", - " self.cash = None #cash record \n", - " self.stocks_df = pd.DataFrame(self.stocks, columns=['stocks'], index = ticker_list)\n", - " self.asset_list = []\n", - " self.price = np.asarray([0] * len(ticker_list))\n", - " self.stockUniverse = ticker_list\n", - " self.turbulence_bool = 0\n", - " self.equities = []\n", - " \n", - " def test_latency(self, test_times = 10): \n", - " total_time = 0\n", - " for i in range(0, test_times):\n", - " time0 = time.time()\n", - " self.get_state()\n", - " time1 = time.time()\n", - " temp_time = time1 - time0\n", - " total_time += temp_time\n", - " latency = total_time/test_times\n", - " print('latency for data processing: ', latency)\n", - " return latency\n", - " \n", - " def run(self):\n", - " orders = self.alpaca.list_orders(status=\"open\")\n", - " for order in orders:\n", - " self.alpaca.cancel_order(order.id)\n", - " \n", - " # Wait for market to open.\n", - " print(\"Waiting for market to open...\")\n", - " self.awaitMarketOpen()\n", - " print(\"Market opened.\")\n", - "\n", - " while True:\n", - "\n", - " # Figure out when the market will close so we can prepare to sell beforehand.\n", - " clock = self.alpaca.get_clock()\n", - " closingTime = clock.next_close.replace(tzinfo=datetime.timezone.utc).timestamp()\n", - " currTime = clock.timestamp.replace(tzinfo=datetime.timezone.utc).timestamp()\n", - " self.timeToClose = closingTime - currTime\n", - " \n", - " if(self.timeToClose < (60)):\n", - " # Close all positions when 1 minutes til market close.\n", - " print(\"Market closing soon. Stop trading.\")\n", - " break\n", - " \n", - " '''# Close all positions when 1 minutes til market close.\n", - " print(\"Market closing soon. Closing positions.\")\n", - "\n", - " threads = []\n", - " positions = self.alpaca.list_positions()\n", - " for position in positions:\n", - " if(position.side == 'long'):\n", - " orderSide = 'sell'\n", - " else:\n", - " orderSide = 'buy'\n", - " qty = abs(int(float(position.qty)))\n", - " respSO = []\n", - " tSubmitOrder = threading.Thread(target=self.submitOrder(qty, position.symbol, orderSide, respSO))\n", - " tSubmitOrder.start()\n", - " threads.append(tSubmitOrder) # record thread for joining later\n", - "\n", - " for x in threads: # wait for all threads to complete\n", - " x.join() \n", - " # Run script again after market close for next trading day.\n", - " print(\"Sleeping until market close (15 minutes).\")\n", - " time.sleep(60 * 15)'''\n", - " \n", - " else:\n", - " self.trade()\n", - " last_equity = float(self.alpaca.get_account().last_equity)\n", - " cur_time = time.time()\n", - " self.equities.append([cur_time,last_equity])\n", - " time.sleep(self.time_interval)\n", - " \n", - " def awaitMarketOpen(self):\n", - " isOpen = self.alpaca.get_clock().is_open\n", - " while(not isOpen):\n", - " clock = self.alpaca.get_clock()\n", - " openingTime = clock.next_open.replace(tzinfo=datetime.timezone.utc).timestamp()\n", - " currTime = clock.timestamp.replace(tzinfo=datetime.timezone.utc).timestamp()\n", - " timeToOpen = int((openingTime - currTime) / 60)\n", - " print(str(timeToOpen) + \" minutes til market open.\")\n", - " time.sleep(60)\n", - " isOpen = self.alpaca.get_clock().is_open\n", - " \n", - " def trade(self):\n", - " state = self.get_state()\n", - " \n", - " if self.drl_lib == 'elegantrl':\n", - " with torch.no_grad():\n", - " s_tensor = torch.as_tensor((state,), device=self.device)\n", - " a_tensor = self.act(s_tensor) \n", - " action = a_tensor.detach().cpu().numpy()[0] \n", - " action = (action * self.max_stock).astype(int)\n", - " \n", - " elif self.drl_lib == 'rllib':\n", - " action = self.agent.compute_single_action(state)\n", - " \n", - " elif self.drl_lib == 'stable_baselines3':\n", - " action = self.model.predict(state)[0]\n", - " \n", - " else:\n", - " raise ValueError('The DRL library input is NOT supported yet. Please check your input.')\n", - " \n", - " self.stocks_cd += 1\n", - " if self.turbulence_bool == 0:\n", - " min_action = 10 # stock_cd\n", - " threads = []\n", - " for index in np.where(action < -min_action)[0]: # sell_index:\n", - " sell_num_shares = min(self.stocks[index], -action[index])\n", - " qty = abs(int(sell_num_shares))\n", - " respSO = []\n", - " tSubmitOrder = threading.Thread(target=self.submitOrder(qty, self.stockUniverse[index], 'sell', respSO))\n", - " tSubmitOrder.start()\n", - " threads.append(tSubmitOrder) # record thread for joining later\n", - " self.cash = float(self.alpaca.get_account().cash)\n", - " self.stocks_cd[index] = 0\n", - " \n", - " for x in threads: # wait for all threads to complete\n", - " x.join() \n", - "\n", - " threads = []\n", - " for index in np.where(action > min_action)[0]: # buy_index:\n", - " if self.cash < 0:\n", - " tmp_cash = 0\n", - " else:\n", - " tmp_cash = self.cash\n", - " buy_num_shares = min(tmp_cash // self.price[index], abs(int(action[index])))\n", - " if (buy_num_shares != buy_num_shares): # if buy_num_change = nan\n", - " qty = 0 # set to 0 quantity\n", - " else:\n", - " qty = abs(int(buy_num_shares))\n", - " qty = abs(int(buy_num_shares))\n", - " respSO = []\n", - " tSubmitOrder = threading.Thread(target=self.submitOrder(qty, self.stockUniverse[index], 'buy', respSO))\n", - " tSubmitOrder.start()\n", - " threads.append(tSubmitOrder) # record thread for joining later\n", - " self.cash = float(self.alpaca.get_account().cash)\n", - " self.stocks_cd[index] = 0\n", - "\n", - " for x in threads: # wait for all threads to complete\n", - " x.join() \n", - " \n", - " else: # sell all when turbulence\n", - " threads = []\n", - " positions = self.alpaca.list_positions()\n", - " for position in positions:\n", - " if(position.side == 'long'):\n", - " orderSide = 'sell'\n", - " else:\n", - " orderSide = 'buy'\n", - " qty = abs(int(float(position.qty)))\n", - " respSO = []\n", - " tSubmitOrder = threading.Thread(target=self.submitOrder(qty, position.symbol, orderSide, respSO))\n", - " tSubmitOrder.start()\n", - " threads.append(tSubmitOrder) # record thread for joining later\n", - "\n", - " for x in threads: # wait for all threads to complete\n", - " x.join() \n", - " \n", - " self.stocks_cd[:] = 0\n", - " \n", - " \n", - " def get_state(self):\n", - " alpaca = AlpacaProcessor(api=self.alpaca)\n", - " price, tech, turbulence = alpaca.fetch_latest_data(ticker_list = self.stockUniverse, time_interval='1Min',\n", - " tech_indicator_list=self.tech_indicator_list)\n", - " turbulence_bool = 1 if turbulence >= self.turbulence_thresh else 0\n", - " \n", - " turbulence = (self.sigmoid_sign(turbulence, self.turbulence_thresh) * 2 ** -5).astype(np.float32)\n", - " \n", - " tech = tech * 2 ** -7\n", - " positions = self.alpaca.list_positions()\n", - " stocks = [0] * len(self.stockUniverse)\n", - " for position in positions:\n", - " ind = self.stockUniverse.index(position.symbol)\n", - " stocks[ind] = ( abs(int(float(position.qty))))\n", - " \n", - " stocks = np.asarray(stocks, dtype = float)\n", - " cash = float(self.alpaca.get_account().cash)\n", - " self.cash = cash\n", - " self.stocks = stocks\n", - " self.turbulence_bool = turbulence_bool \n", - " self.price = price\n", - " \n", - " \n", - " \n", - " amount = np.array(self.cash * (2 ** -12), dtype=np.float32)\n", - " scale = np.array(2 ** -6, dtype=np.float32)\n", - " state = np.hstack((amount,\n", - " turbulence,\n", - " self.turbulence_bool,\n", - " price * scale,\n", - " self.stocks * scale,\n", - " self.stocks_cd,\n", - " tech,\n", - " )).astype(np.float32)\n", - " state[np.isnan(state)] = 0.0\n", - " state[np.isinf(state)] = 0.0\n", - " print(len(self.stockUniverse))\n", - " return state\n", - " \n", - " def submitOrder(self, qty, stock, side, resp):\n", - " if(qty > 0):\n", - " try:\n", - " self.alpaca.submit_order(stock, qty, side, \"market\", \"day\")\n", - " print(\"Market order of | \" + str(qty) + \" \" + stock + \" \" + side + \" | completed.\")\n", - " resp.append(True)\n", - " except:\n", - " print(\"Order of | \" + str(qty) + \" \" + stock + \" \" + side + \" | did not go through.\")\n", - " resp.append(False)\n", - " else:\n", - " print(\"Quantity is 0, order of | \" + str(qty) + \" \" + stock + \" \" + side + \" | not completed.\")\n", - " resp.append(True)\n", - "\n", - " @staticmethod\n", - " def sigmoid_sign(ary, thresh):\n", - " def sigmoid(x):\n", - " return 1 / (1 + np.exp(-x * np.e)) - 0.5\n", - "\n", - " return sigmoid(ary / thresh) * thresh\n", - " \n", - "class StockEnvEmpty(gym.Env):\n", - " #Empty Env used for loading rllib agent\n", - " def __init__(self,config):\n", - " state_dim = config['state_dim']\n", - " action_dim = config['action_dim']\n", - " self.env_num = 1\n", - " self.max_step = 10000\n", - " self.env_name = 'StockEnvEmpty'\n", - " self.state_dim = state_dim \n", - " self.action_dim = action_dim\n", - " self.if_discrete = False \n", - " self.target_return = 9999\n", - " self.observation_space = gym.spaces.Box(low=-3000, high=3000, shape=(state_dim,), dtype=np.float32)\n", - " self.action_space = gym.spaces.Box(low=-1, high=1, shape=(action_dim,), dtype=np.float32)\n", - " \n", - " def reset(self):\n", - " return \n", - "\n", - " def step(self, actions):\n", - " return" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "os4C4-4H7ns7" - }, - "source": [ - "## Run Paper trading" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "7nw0i-0UN3-7", - "outputId": "25729df7-4775-49af-bf5a-38e3970d0056" - }, - "outputs": [], - "source": [ - "print(DOW_30_TICKER)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "YsSBK9ION1t6", - "outputId": "49a69655-850f-436b-a21c-fffe48528e71" - }, - "outputs": [], - "source": [ - "state_dim" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "xYtSv6P1N247", - "outputId": "174550ce-664a-41fc-bd89-9d3726960c5b" - }, - "outputs": [], - "source": [ - "action_dim" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Kl9nulnAJtiI" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1660 minutes til market open.\n", - "1659 minutes til market open.\n", - "1658 minutes til market open.\n", - "1657 minutes til market open.\n", - "1656 minutes til market open.\n" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "V1ofncK2cYhs" + }, + "source": [ + "Disclaimer: Nothing herein is financial advice, and NOT a recommendation to trade real money. Many platforms exist for simulated trading (paper trading) which can be used for building and developing the methods discussed. Please use common sense and always first consult a professional before trading or investing." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yhzqm7zYB1Xg" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j3mbRu3s1YlD" + }, + "source": [ + "# Part 1: Install FinRL" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0gkmsPgbvNf6" + }, + "outputs": [], + "source": [ + "## install finrl library\n", + "!pip install git+https://github.com/AI4Finance-Foundation/FinRL.git" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3rwy7V72-8YY" + }, + "source": [ + "## Get the API Keys Ready" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8Z6qlLXY-fA2" + }, + "outputs": [], + "source": [ + "API_KEY = 'API_KEY'\n", + "API_SECRET = 'API_SECRET'" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "--6Kx8I21erH" + }, + "source": [ + "## Import related modules" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "H7I7zsyYfoLJ" + }, + "outputs": [], + "source": [ + "from finrl.config_tickers import DOW_30_TICKER\n", + "from finrl.config import INDICATORS\n", + "from finrl.meta.env_stock_trading.env_stocktrading_np import StockTradingEnv\n", + "\n", + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0EVJIQUR6_fu" + }, + "source": [ + "## PPO" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-EYx40S84tzo" + }, + "outputs": [], + "source": [ + "import os\n", + "import time\n", + "import gymnasium as gym\n", + "import numpy as np\n", + "import numpy.random as rd\n", + "import torch\n", + "import torch.nn as nn\n", + "from typing import List, Tuple\n", + "from copy import deepcopy\n", + "from torch import Tensor\n", + "from torch.distributions.normal import Normal\n", + "\n", + "\n", + "class ActorPPO(nn.Module):\n", + " def __init__(self, dims: List[int], state_dim: int, action_dim: int):\n", + " super().__init__()\n", + " self.net = build_mlp(dims=[state_dim, *dims, action_dim])\n", + " self.action_std_log = nn.Parameter(torch.zeros((1, action_dim)), requires_grad=True) # trainable parameter\n", + "\n", + " def forward(self, state: Tensor) -> Tensor:\n", + " return self.net(state).tanh() # action.tanh()\n", + "\n", + " def get_action(self, state: Tensor) -> Tuple[Tensor, Tensor]: # for exploration\n", + " action_avg = self.net(state)\n", + " action_std = self.action_std_log.exp()\n", + "\n", + " dist = Normal(action_avg, action_std)\n", + " action = dist.sample()\n", + " logprob = dist.log_prob(action).sum(1)\n", + " return action, logprob\n", + "\n", + " def get_logprob_entropy(self, state: Tensor, action: Tensor) -> Tuple[Tensor, Tensor]:\n", + " action_avg = self.net(state)\n", + " action_std = self.action_std_log.exp()\n", + "\n", + " dist = Normal(action_avg, action_std)\n", + " logprob = dist.log_prob(action).sum(1)\n", + " entropy = dist.entropy().sum(1)\n", + " return logprob, entropy\n", + "\n", + " @staticmethod\n", + " def convert_action_for_env(action: Tensor) -> Tensor:\n", + " return action.tanh()\n", + "\n", + "\n", + "class CriticPPO(nn.Module):\n", + " def __init__(self, dims: List[int], state_dim: int, _action_dim: int):\n", + " super().__init__()\n", + " self.net = build_mlp(dims=[state_dim, *dims, 1])\n", + "\n", + " def forward(self, state: Tensor) -> Tensor:\n", + " return self.net(state) # advantage value\n", + "\n", + "\n", + "def build_mlp(dims: List[int]) -> nn.Sequential: # MLP (MultiLayer Perceptron)\n", + " net_list = []\n", + " for i in range(len(dims) - 1):\n", + " net_list.extend([nn.Linear(dims[i], dims[i + 1]), nn.ReLU()])\n", + " del net_list[-1] # remove the activation of output layer\n", + " return nn.Sequential(*net_list)\n", + "\n", + "\n", + "class Config:\n", + " def __init__(self, agent_class=None, env_class=None, env_args=None):\n", + " self.env_class = env_class # env = env_class(**env_args)\n", + " self.env_args = env_args # env = env_class(**env_args)\n", + "\n", + " if env_args is None: # dummy env_args\n", + " env_args = {'env_name': None, 'state_dim': None, 'action_dim': None, 'if_discrete': None}\n", + " self.env_name = env_args['env_name'] # the name of environment. Be used to set 'cwd'.\n", + " self.state_dim = env_args['state_dim'] # vector dimension (feature number) of state\n", + " self.action_dim = env_args['action_dim'] # vector dimension (feature number) of action\n", + " self.if_discrete = env_args['if_discrete'] # discrete or continuous action space\n", + "\n", + " self.agent_class = agent_class # agent = agent_class(...)\n", + "\n", + " '''Arguments for reward shaping'''\n", + " self.gamma = 0.99 # discount factor of future rewards\n", + " self.reward_scale = 1.0 # an approximate target reward usually be closed to 256\n", + "\n", + " '''Arguments for training'''\n", + " self.gpu_id = int(0) # `int` means the ID of single GPU, -1 means CPU\n", + " self.net_dims = (64, 32) # the middle layer dimension of MLP (MultiLayer Perceptron)\n", + " self.learning_rate = 6e-5 # 2 ** -14 ~= 6e-5\n", + " self.soft_update_tau = 5e-3 # 2 ** -8 ~= 5e-3\n", + " self.batch_size = int(128) # num of transitions sampled from replay buffer.\n", + " self.horizon_len = int(2000) # collect horizon_len step while exploring, then update network\n", + " self.buffer_size = None # ReplayBuffer size. Empty the ReplayBuffer for on-policy.\n", + " self.repeat_times = 8.0 # repeatedly update network using ReplayBuffer to keep critic's loss small\n", + "\n", + " '''Arguments for evaluate'''\n", + " self.cwd = None # current working directory to save model. None means set automatically\n", + " self.break_step = +np.inf # break training if 'total_step > break_step'\n", + " self.eval_times = int(32) # number of times that get episodic cumulative return\n", + " self.eval_per_step = int(2e4) # evaluate the agent per training steps\n", + "\n", + " def init_before_training(self):\n", + " if self.cwd is None: # set cwd (current working directory) for saving model\n", + " self.cwd = f'./{self.env_name}_{self.agent_class.__name__[5:]}'\n", + " os.makedirs(self.cwd, exist_ok=True)\n", + "\n", + "\n", + "def get_gym_env_args(env, if_print: bool) -> dict:\n", + " if {'unwrapped', 'observation_space', 'action_space', 'spec'}.issubset(dir(env)): # isinstance(env, gym.Env):\n", + " env_name = env.unwrapped.spec.id\n", + " state_shape = env.observation_space.shape\n", + " state_dim = state_shape[0] if len(state_shape) == 1 else state_shape # sometimes state_dim is a list\n", + "\n", + " if_discrete = isinstance(env.action_space, gym.spaces.Discrete)\n", + " if if_discrete: # make sure it is discrete action space\n", + " action_dim = env.action_space.n\n", + " elif isinstance(env.action_space, gym.spaces.Box): # make sure it is continuous action space\n", + " action_dim = env.action_space.shape[0]\n", + "\n", + " env_args = {'env_name': env_name, 'state_dim': state_dim, 'action_dim': action_dim, 'if_discrete': if_discrete}\n", + " print(f\"env_args = {repr(env_args)}\") if if_print else None\n", + " return env_args\n", + "\n", + "\n", + "def kwargs_filter(function, kwargs: dict) -> dict:\n", + " import inspect\n", + " sign = inspect.signature(function).parameters.values()\n", + " sign = {val.name for val in sign}\n", + " common_args = sign.intersection(kwargs.keys())\n", + " return {key: kwargs[key] for key in common_args} # filtered kwargs\n", + "\n", + "\n", + "def build_env(env_class=None, env_args=None):\n", + " if env_class.__module__ == 'gym.envs.registration': # special rule\n", + " env = env_class(id=env_args['env_name'])\n", + " else:\n", + " env = env_class(**kwargs_filter(env_class.__init__, env_args.copy()))\n", + " for attr_str in ('env_name', 'state_dim', 'action_dim', 'if_discrete'):\n", + " setattr(env, attr_str, env_args[attr_str])\n", + " return env\n", + "\n", + "\n", + "class AgentBase:\n", + " def __init__(self, net_dims: List[int], state_dim: int, action_dim: int, gpu_id: int = 0, args: Config = Config()):\n", + " self.state_dim = state_dim\n", + " self.action_dim = action_dim\n", + "\n", + " self.gamma = args.gamma\n", + " self.batch_size = args.batch_size\n", + " self.repeat_times = args.repeat_times\n", + " self.reward_scale = args.reward_scale\n", + " self.soft_update_tau = args.soft_update_tau\n", + "\n", + " self.states = None # assert self.states == (1, state_dim)\n", + " self.device = torch.device(f\"cuda:{gpu_id}\" if (torch.cuda.is_available() and (gpu_id >= 0)) else \"cpu\")\n", + "\n", + " act_class = getattr(self, \"act_class\", None)\n", + " cri_class = getattr(self, \"cri_class\", None)\n", + " self.act = self.act_target = act_class(net_dims, state_dim, action_dim).to(self.device)\n", + " self.cri = self.cri_target = cri_class(net_dims, state_dim, action_dim).to(self.device) \\\n", + " if cri_class else self.act\n", + "\n", + " self.act_optimizer = torch.optim.Adam(self.act.parameters(), args.learning_rate)\n", + " self.cri_optimizer = torch.optim.Adam(self.cri.parameters(), args.learning_rate) \\\n", + " if cri_class else self.act_optimizer\n", + "\n", + " self.criterion = torch.nn.SmoothL1Loss()\n", + "\n", + " @staticmethod\n", + " def optimizer_update(optimizer, objective: Tensor):\n", + " optimizer.zero_grad()\n", + " objective.backward()\n", + " optimizer.step()\n", + "\n", + " @staticmethod\n", + " def soft_update(target_net: torch.nn.Module, current_net: torch.nn.Module, tau: float):\n", + " for tar, cur in zip(target_net.parameters(), current_net.parameters()):\n", + " tar.data.copy_(cur.data * tau + tar.data * (1.0 - tau))\n", + "\n", + "\n", + "class AgentPPO(AgentBase):\n", + " def __init__(self, net_dims: List[int], state_dim: int, action_dim: int, gpu_id: int = 0, args: Config = Config()):\n", + " self.if_off_policy = False\n", + " self.act_class = getattr(self, \"act_class\", ActorPPO)\n", + " self.cri_class = getattr(self, \"cri_class\", CriticPPO)\n", + " AgentBase.__init__(self, net_dims, state_dim, action_dim, gpu_id, args)\n", + "\n", + " self.ratio_clip = getattr(args, \"ratio_clip\", 0.25) # `ratio.clamp(1 - clip, 1 + clip)`\n", + " self.lambda_gae_adv = getattr(args, \"lambda_gae_adv\", 0.95) # could be 0.80~0.99\n", + " self.lambda_entropy = getattr(args, \"lambda_entropy\", 0.01) # could be 0.00~0.10\n", + " self.lambda_entropy = torch.tensor(self.lambda_entropy, dtype=torch.float32, device=self.device)\n", + "\n", + " def explore_env(self, env, horizon_len: int) -> List[Tensor]:\n", + " states = torch.zeros((horizon_len, self.state_dim), dtype=torch.float32).to(self.device)\n", + " actions = torch.zeros((horizon_len, self.action_dim), dtype=torch.float32).to(self.device)\n", + " logprobs = torch.zeros(horizon_len, dtype=torch.float32).to(self.device)\n", + " rewards = torch.zeros(horizon_len, dtype=torch.float32).to(self.device)\n", + " dones = torch.zeros(horizon_len, dtype=torch.bool).to(self.device)\n", + "\n", + " ary_state = self.states[0]\n", + "\n", + " get_action = self.act.get_action\n", + " convert = self.act.convert_action_for_env\n", + " for i in range(horizon_len):\n", + " state = torch.as_tensor(ary_state, dtype=torch.float32, device=self.device)\n", + " action, logprob = [t.squeeze(0) for t in get_action(state.unsqueeze(0))[:2]]\n", + "\n", + " ary_action = convert(action).detach().cpu().numpy()\n", + " ary_state, reward, done, _, _ = env.step(ary_action)\n", + " if done:\n", + " ary_state, _ = env.reset()\n", + "\n", + " states[i] = state\n", + " actions[i] = action\n", + " logprobs[i] = logprob\n", + " rewards[i] = reward\n", + " dones[i] = done\n", + "\n", + " self.states[0] = ary_state\n", + " rewards = (rewards * self.reward_scale).unsqueeze(1)\n", + " undones = (1 - dones.type(torch.float32)).unsqueeze(1)\n", + " return states, actions, logprobs, rewards, undones\n", + "\n", + " def update_net(self, buffer) -> List[float]:\n", + " with torch.no_grad():\n", + " states, actions, logprobs, rewards, undones = buffer\n", + " buffer_size = states.shape[0]\n", + "\n", + " '''get advantages reward_sums'''\n", + " bs = 2 ** 10 # set a smaller 'batch_size' when out of GPU memory.\n", + " values = [self.cri(states[i:i + bs]) for i in range(0, buffer_size, bs)]\n", + " values = torch.cat(values, dim=0).squeeze(1) # values.shape == (buffer_size, )\n", + "\n", + " advantages = self.get_advantages(rewards, undones, values) # advantages.shape == (buffer_size, )\n", + " reward_sums = advantages + values # reward_sums.shape == (buffer_size, )\n", + " del rewards, undones, values\n", + "\n", + " advantages = (advantages - advantages.mean()) / (advantages.std(dim=0) + 1e-5)\n", + " assert logprobs.shape == advantages.shape == reward_sums.shape == (buffer_size,)\n", + "\n", + " '''update network'''\n", + " obj_critics = 0.0\n", + " obj_actors = 0.0\n", + "\n", + " update_times = int(buffer_size * self.repeat_times / self.batch_size)\n", + " assert update_times >= 1\n", + " for _ in range(update_times):\n", + " indices = torch.randint(buffer_size, size=(self.batch_size,), requires_grad=False)\n", + " state = states[indices]\n", + " action = actions[indices]\n", + " logprob = logprobs[indices]\n", + " advantage = advantages[indices]\n", + " reward_sum = reward_sums[indices]\n", + "\n", + " value = self.cri(state).squeeze(1) # critic network predicts the reward_sum (Q value) of state\n", + " obj_critic = self.criterion(value, reward_sum)\n", + " self.optimizer_update(self.cri_optimizer, obj_critic)\n", + "\n", + " new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action)\n", + " ratio = (new_logprob - logprob.detach()).exp()\n", + " surrogate1 = advantage * ratio\n", + " surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip)\n", + " obj_surrogate = torch.min(surrogate1, surrogate2).mean()\n", + "\n", + " obj_actor = obj_surrogate + obj_entropy.mean() * self.lambda_entropy\n", + " self.optimizer_update(self.act_optimizer, -obj_actor)\n", + "\n", + " obj_critics += obj_critic.item()\n", + " obj_actors += obj_actor.item()\n", + " a_std_log = getattr(self.act, 'a_std_log', torch.zeros(1)).mean()\n", + " return obj_critics / update_times, obj_actors / update_times, a_std_log.item()\n", + "\n", + " def get_advantages(self, rewards: Tensor, undones: Tensor, values: Tensor) -> Tensor:\n", + " advantages = torch.empty_like(values) # advantage value\n", + "\n", + " masks = undones * self.gamma\n", + " horizon_len = rewards.shape[0]\n", + "\n", + " next_state = torch.tensor(self.states, dtype=torch.float32).to(self.device)\n", + " next_value = self.cri(next_state).detach()[0, 0]\n", + "\n", + " advantage = 0 # last_gae_lambda\n", + " for t in range(horizon_len - 1, -1, -1):\n", + " delta = rewards[t] + masks[t] * next_value - values[t]\n", + " advantages[t] = advantage = delta + masks[t] * self.lambda_gae_adv * advantage\n", + " next_value = values[t]\n", + " return advantages\n", + "\n", + "\n", + "class PendulumEnv(gym.Wrapper): # a demo of custom gym env\n", + " def __init__(self):\n", + " gym_env_name = \"Pendulum-v1\"\n", + " super().__init__(env=gym.make(gym_env_name))\n", + "\n", + " '''the necessary env information when you design a custom env'''\n", + " self.env_name = gym_env_name # the name of this env.\n", + " self.state_dim = self.observation_space.shape[0] # feature number of state\n", + " self.action_dim = self.action_space.shape[0] # feature number of action\n", + " self.if_discrete = False # discrete action or continuous action\n", + "\n", + " def reset(self) -> np.ndarray: # reset the agent in env\n", + " resetted_env, _ = self.env.reset()\n", + " return resetted_env\n", + "\n", + " def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]: # agent interacts in env\n", + " # We suggest that adjust action space to (-1, +1) when designing a custom env.\n", + " state, reward, done, info_dict, _ = self.env.step(action * 2)\n", + " return state.reshape(self.state_dim), float(reward), done, info_dict\n", + "\n", + "\n", + "def train_agent(args: Config):\n", + " args.init_before_training()\n", + "\n", + " env = build_env(args.env_class, args.env_args)\n", + " agent = args.agent_class(args.net_dims, args.state_dim, args.action_dim, gpu_id=args.gpu_id, args=args)\n", + "\n", + " new_env, _ = env.reset()\n", + " agent.states = new_env[np.newaxis, :]\n", + "\n", + " evaluator = Evaluator(eval_env=build_env(args.env_class, args.env_args),\n", + " eval_per_step=args.eval_per_step,\n", + " eval_times=args.eval_times,\n", + " cwd=args.cwd)\n", + " torch.set_grad_enabled(False)\n", + " while True: # start training\n", + " buffer_items = agent.explore_env(env, args.horizon_len)\n", + "\n", + " torch.set_grad_enabled(True)\n", + " logging_tuple = agent.update_net(buffer_items)\n", + " torch.set_grad_enabled(False)\n", + "\n", + " evaluator.evaluate_and_save(agent.act, args.horizon_len, logging_tuple)\n", + " if (evaluator.total_step > args.break_step) or os.path.exists(f\"{args.cwd}/stop\"):\n", + " torch.save(agent.act.state_dict(), args.cwd + '/actor.pth')\n", + " break # stop training when reach `break_step` or `mkdir cwd/stop`\n", + "\n", + "\n", + "def render_agent(env_class, env_args: dict, net_dims: List[int], agent_class, actor_path: str, render_times: int = 8):\n", + " env = build_env(env_class, env_args)\n", + "\n", + " state_dim = env_args['state_dim']\n", + " action_dim = env_args['action_dim']\n", + " agent = agent_class(net_dims, state_dim, action_dim, gpu_id=-1)\n", + " actor = agent.act\n", + "\n", + " print(f\"| render and load actor from: {actor_path}\")\n", + " actor.load_state_dict(torch.load(actor_path, map_location=lambda storage, loc: storage))\n", + " for i in range(render_times):\n", + " cumulative_reward, episode_step = get_rewards_and_steps(env, actor, if_render=True)\n", + " print(f\"|{i:4} cumulative_reward {cumulative_reward:9.3f} episode_step {episode_step:5.0f}\")\n", + "\n", + "\n", + "class Evaluator:\n", + " def __init__(self, eval_env, eval_per_step: int = 1e4, eval_times: int = 8, cwd: str = '.'):\n", + " self.cwd = cwd\n", + " self.env_eval = eval_env\n", + " self.eval_step = 0\n", + " self.total_step = 0\n", + " self.start_time = time.time()\n", + " self.eval_times = eval_times # number of times that get episodic cumulative return\n", + " self.eval_per_step = eval_per_step # evaluate the agent per training steps\n", + "\n", + " self.recorder = []\n", + " print(f\"\\n| `step`: Number of samples, or total training steps, or running times of `env.step()`.\"\n", + " f\"\\n| `time`: Time spent from the start of training to this moment.\"\n", + " f\"\\n| `avgR`: Average value of cumulative rewards, which is the sum of rewards in an episode.\"\n", + " f\"\\n| `stdR`: Standard dev of cumulative rewards, which is the sum of rewards in an episode.\"\n", + " f\"\\n| `avgS`: Average of steps in an episode.\"\n", + " f\"\\n| `objC`: Objective of Critic network. Or call it loss function of critic network.\"\n", + " f\"\\n| `objA`: Objective of Actor network. It is the average Q value of the critic network.\"\n", + " f\"\\n| {'step':>8} {'time':>8} | {'avgR':>8} {'stdR':>6} {'avgS':>6} | {'objC':>8} {'objA':>8}\")\n", + "\n", + " def evaluate_and_save(self, actor, horizon_len: int, logging_tuple: tuple):\n", + " self.total_step += horizon_len\n", + " if self.eval_step + self.eval_per_step > self.total_step:\n", + " return\n", + " self.eval_step = self.total_step\n", + "\n", + " rewards_steps_ary = [get_rewards_and_steps(self.env_eval, actor) for _ in range(self.eval_times)]\n", + " rewards_steps_ary = np.array(rewards_steps_ary, dtype=np.float32)\n", + " avg_r = rewards_steps_ary[:, 0].mean() # average of cumulative rewards\n", + " std_r = rewards_steps_ary[:, 0].std() # std of cumulative rewards\n", + " avg_s = rewards_steps_ary[:, 1].mean() # average of steps in an episode\n", + "\n", + " used_time = time.time() - self.start_time\n", + " self.recorder.append((self.total_step, used_time, avg_r))\n", + "\n", + " print(f\"| {self.total_step:8.2e} {used_time:8.0f} \"\n", + " f\"| {avg_r:8.2f} {std_r:6.2f} {avg_s:6.0f} \"\n", + " f\"| {logging_tuple[0]:8.2f} {logging_tuple[1]:8.2f}\")\n", + "\n", + "\n", + "def get_rewards_and_steps(env, actor, if_render: bool = False) -> Tuple[float, int]: # cumulative_rewards and episode_steps\n", + " device = next(actor.parameters()).device # net.parameters() is a Python generator.\n", + "\n", + " state, _ = env.reset()\n", + " episode_steps = 0\n", + " cumulative_returns = 0.0 # sum of rewards in an episode\n", + " for episode_steps in range(12345):\n", + " tensor_state = torch.as_tensor(state, dtype=torch.float32, device=device).unsqueeze(0)\n", + " tensor_action = actor(tensor_state)\n", + " action = tensor_action.detach().cpu().numpy()[0] # not need detach(), because using torch.no_grad() outside\n", + " state, reward, done, _, _ = env.step(action)\n", + " cumulative_returns += reward\n", + "\n", + " if if_render:\n", + " env.render()\n", + " if done:\n", + " break\n", + " return cumulative_returns, episode_steps + 1" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9tzAw9k26nAC" + }, + "source": [ + "## DRL Agent Class" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pwCbbocm6PHM" + }, + "outputs": [], + "source": [ + "from __future__ import annotations\n", + "\n", + "import torch\n", + "# from elegantrl.agents import AgentA2C\n", + "\n", + "MODELS = {\"ppo\": AgentPPO}\n", + "OFF_POLICY_MODELS = [\"ddpg\", \"td3\", \"sac\"]\n", + "ON_POLICY_MODELS = [\"ppo\"]\n", + "# MODEL_KWARGS = {x: config.__dict__[f\"{x.upper()}_PARAMS\"] for x in MODELS.keys()}\n", + "#\n", + "# NOISE = {\n", + "# \"normal\": NormalActionNoise,\n", + "# \"ornstein_uhlenbeck\": OrnsteinUhlenbeckActionNoise,\n", + "# }\n", + "\n", + "\n", + "class DRLAgent:\n", + " \"\"\"Implementations of DRL algorithms\n", + " Attributes\n", + " ----------\n", + " env: gym environment class\n", + " user-defined class\n", + " Methods\n", + " -------\n", + " get_model()\n", + " setup DRL algorithms\n", + " train_model()\n", + " train DRL algorithms in a train dataset\n", + " and output the trained model\n", + " DRL_prediction()\n", + " make a prediction in a test dataset and get results\n", + " \"\"\"\n", + "\n", + " def __init__(self, env, price_array, tech_array, turbulence_array):\n", + " self.env = env\n", + " self.price_array = price_array\n", + " self.tech_array = tech_array\n", + " self.turbulence_array = turbulence_array\n", + "\n", + " def get_model(self, model_name, model_kwargs):\n", + " env_config = {\n", + " \"price_array\": self.price_array,\n", + " \"tech_array\": self.tech_array,\n", + " \"turbulence_array\": self.turbulence_array,\n", + " \"if_train\": True,\n", + " }\n", + " environment = self.env(config=env_config)\n", + " env_args = {'config': env_config,\n", + " 'env_name': environment.env_name,\n", + " 'state_dim': environment.state_dim,\n", + " 'action_dim': environment.action_dim,\n", + " 'if_discrete': False}\n", + " agent = MODELS[model_name]\n", + " if model_name not in MODELS:\n", + " raise NotImplementedError(\"NotImplementedError\")\n", + " model = Config(agent_class=agent, env_class=self.env, env_args=env_args)\n", + " model.if_off_policy = model_name in OFF_POLICY_MODELS\n", + " if model_kwargs is not None:\n", + " try:\n", + " model.learning_rate = model_kwargs[\"learning_rate\"]\n", + " model.batch_size = model_kwargs[\"batch_size\"]\n", + " model.gamma = model_kwargs[\"gamma\"]\n", + " model.seed = model_kwargs[\"seed\"]\n", + " model.net_dims = model_kwargs[\"net_dimension\"]\n", + " model.target_step = model_kwargs[\"target_step\"]\n", + " model.eval_gap = model_kwargs[\"eval_gap\"]\n", + " model.eval_times = model_kwargs[\"eval_times\"]\n", + " except BaseException:\n", + " raise ValueError(\n", + " \"Fail to read arguments, please check 'model_kwargs' input.\"\n", + " )\n", + " return model\n", + "\n", + " def train_model(self, model, cwd, total_timesteps=5000):\n", + " model.cwd = cwd\n", + " model.break_step = total_timesteps\n", + " train_agent(model)\n", + "\n", + " @staticmethod\n", + " def DRL_prediction(model_name, cwd, net_dimension, environment):\n", + " if model_name not in MODELS:\n", + " raise NotImplementedError(\"NotImplementedError\")\n", + " agent_class = MODELS[model_name]\n", + " environment.env_num = 1\n", + " agent = agent_class(net_dimension, environment.state_dim, environment.action_dim)\n", + " actor = agent.act\n", + " # load agent\n", + " try:\n", + " cwd = cwd + '/actor.pth'\n", + " print(f\"| load actor from: {cwd}\")\n", + " actor.load_state_dict(torch.load(cwd, map_location=lambda storage, loc: storage))\n", + " act = actor\n", + " device = agent.device\n", + " except BaseException:\n", + " raise ValueError(\"Fail to load agent!\")\n", + "\n", + " # test on the testing env\n", + " _torch = torch\n", + " state, _ = environment.reset()\n", + " episode_returns = [] # the cumulative_return / initial_account\n", + " episode_total_assets = [environment.initial_total_asset]\n", + " with _torch.no_grad():\n", + " for i in range(environment.max_step):\n", + " s_tensor = _torch.as_tensor((state,), device=device)\n", + " a_tensor = act(s_tensor) # action_tanh = act.forward()\n", + " action = (\n", + " a_tensor.detach().cpu().numpy()[0]\n", + " ) # not need detach(), because with torch.no_grad() outside\n", + " state, reward, done, _, _ = environment.step(action)\n", + "\n", + " total_asset = (\n", + " environment.amount\n", + " + (\n", + " environment.price_ary[environment.day] * environment.stocks\n", + " ).sum()\n", + " )\n", + " episode_total_assets.append(total_asset)\n", + " episode_return = total_asset / environment.initial_total_asset\n", + " episode_returns.append(episode_return)\n", + " if done:\n", + " break\n", + " print(\"Test Finished!\")\n", + " # return episode total_assets on testing data\n", + " print(\"episode_return\", episode_return)\n", + " return episode_total_assets\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zjLda8No6pvI" + }, + "source": [ + "## Train & Test Functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "j8-e03ev32oz" + }, + "outputs": [], + "source": [ + "from __future__ import annotations\n", + "\n", + "from finrl.meta.data_processor import DataProcessor\n", + "\n", + "def train(\n", + " start_date,\n", + " end_date,\n", + " ticker_list,\n", + " data_source,\n", + " time_interval,\n", + " technical_indicator_list,\n", + " drl_lib,\n", + " env,\n", + " model_name,\n", + " if_vix=True,\n", + " **kwargs,\n", + "):\n", + " # download data\n", + " dp = DataProcessor(data_source, **kwargs)\n", + " data = dp.download_data(ticker_list, start_date, end_date, time_interval)\n", + " data = dp.clean_data(data)\n", + " data = dp.add_technical_indicator(data, technical_indicator_list)\n", + " if if_vix:\n", + " data = dp.add_vix(data)\n", + " else:\n", + " data = dp.add_turbulence(data)\n", + " price_array, tech_array, turbulence_array = dp.df_to_array(data, if_vix)\n", + " env_config = {\n", + " \"price_array\": price_array,\n", + " \"tech_array\": tech_array,\n", + " \"turbulence_array\": turbulence_array,\n", + " \"if_train\": True,\n", + " }\n", + " env_instance = env(config=env_config)\n", + "\n", + " # read parameters\n", + " cwd = kwargs.get(\"cwd\", \"./\" + str(model_name))\n", + "\n", + " if drl_lib == \"elegantrl\":\n", + " DRLAgent_erl = DRLAgent\n", + " break_step = kwargs.get(\"break_step\", 1e6)\n", + " erl_params = kwargs.get(\"erl_params\")\n", + " agent = DRLAgent_erl(\n", + " env=env,\n", + " price_array=price_array,\n", + " tech_array=tech_array,\n", + " turbulence_array=turbulence_array,\n", + " )\n", + " model = agent.get_model(model_name, model_kwargs=erl_params)\n", + " trained_model = agent.train_model(\n", + " model=model, cwd=cwd, total_timesteps=break_step\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Evsg8QtEDHDO" + }, + "outputs": [], + "source": [ + "from __future__ import annotations\n", + "\n", + "from finrl.config import INDICATORS\n", + "from finrl.config import RLlib_PARAMS\n", + "from finrl.config import TEST_END_DATE\n", + "from finrl.config import TEST_START_DATE\n", + "from finrl.config_tickers import DOW_30_TICKER\n", + "\n", + "def test(\n", + " start_date,\n", + " end_date,\n", + " ticker_list,\n", + " data_source,\n", + " time_interval,\n", + " technical_indicator_list,\n", + " drl_lib,\n", + " env,\n", + " model_name,\n", + " if_vix=True,\n", + " **kwargs,\n", + "):\n", + "\n", + " # import data processor\n", + " from finrl.meta.data_processor import DataProcessor\n", + "\n", + " # fetch data\n", + " dp = DataProcessor(data_source, **kwargs)\n", + " data = dp.download_data(ticker_list, start_date, end_date, time_interval)\n", + " data = dp.clean_data(data)\n", + " data = dp.add_technical_indicator(data, technical_indicator_list)\n", + "\n", + " if if_vix:\n", + " data = dp.add_vix(data)\n", + " else:\n", + " data = dp.add_turbulence(data)\n", + " price_array, tech_array, turbulence_array = dp.df_to_array(data, if_vix)\n", + "\n", + " env_config = {\n", + " \"price_array\": price_array,\n", + " \"tech_array\": tech_array,\n", + " \"turbulence_array\": turbulence_array,\n", + " \"if_train\": False,\n", + " }\n", + " env_instance = env(config=env_config)\n", + "\n", + " # load elegantrl needs state dim, action dim and net dim\n", + " net_dimension = kwargs.get(\"net_dimension\", 2**7)\n", + " cwd = kwargs.get(\"cwd\", \"./\" + str(model_name))\n", + " print(\"price_array: \", len(price_array))\n", + "\n", + " if drl_lib == \"elegantrl\":\n", + " DRLAgent_erl = DRLAgent\n", + " episode_total_assets = DRLAgent_erl.DRL_prediction(\n", + " model_name=model_name,\n", + " cwd=cwd,\n", + " net_dimension=net_dimension,\n", + " environment=env_instance,\n", + " )\n", + " return episode_total_assets" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pf5aVHAU-xF6" + }, + "source": [ + "## Import Dow Jones 30 Symbols" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jx25TA_X87F-" + }, + "outputs": [], + "source": [ + "ticker_list = DOW_30_TICKER\n", + "action_dim = len(DOW_30_TICKER)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UIV0kO_y-inG" + }, + "outputs": [], + "source": [ + "print(ticker_list)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CnqQ-cC5-rfO" + }, + "outputs": [], + "source": [ + "print(INDICATORS)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rZMkcyjZ-25l" + }, + "source": [ + "## Calculate the DRL state dimension manually for paper trading" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GLfkTsXK-e90" + }, + "outputs": [], + "source": [ + "# amount + (turbulence, turbulence_bool) + (price, shares, cd (holding time)) * stock_dim + tech_dim\n", + "state_dim = 1 + 2 + 3 * action_dim + len(INDICATORS) * action_dim" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QqUkvImG-n66" + }, + "outputs": [], + "source": [ + "state_dim" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vdaI0oJ9B1Xk" + }, + "outputs": [], + "source": [ + "env = StockTradingEnv" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "J25MuZLiGqCP" + }, + "source": [ + "## Show the data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "puJZWm8NHtSN" + }, + "source": [ + "### Step 1. Pick a data source" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3ZCru8f7GqgL" + }, + "outputs": [], + "source": [ + "# DP = DataProcessor(data_source = 'alpaca',\n", + "# API_KEY = API_KEY,\n", + "# API_SECRET = API_SECRET,\n", + "# API_BASE_URL = API_BASE_URL\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nvPEW2mYHvkR" + }, + "source": [ + "### Step 2. Get ticker list, Set start date and end date, specify the data frequency" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NPNxj6c8HIiE" + }, + "outputs": [], + "source": [ + "# data = DP.download_data(start_date = '2021-10-04',\n", + "# end_date = '2021-10-08',\n", + "# ticker_list = ticker_list,\n", + "# time_interval= '1Min')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pPcazCq1d5ec" + }, + "outputs": [], + "source": [ + "# data['timestamp'].nunique()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "i46jGdE0IAel" + }, + "source": [ + "### Step 3. Data Cleaning & Feature Engineering" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "x9euUsEPHWFK" + }, + "outputs": [], + "source": [ + "# data = DP.clean_data(data)\n", + "# data = DP.add_technical_indicator(data, INDICATORS)\n", + "# data = DP.add_vix(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GOcPTaAgHdxa" + }, + "outputs": [], + "source": [ + "# data.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bbu03L_UIMWt" + }, + "source": [ + "### Step 4. Transform to numpy array" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Rzj0vjZZHdGM" + }, + "outputs": [], + "source": [ + "# price_array, tech_array, turbulence_array = DP.df_to_array(data, if_vix=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eW0UDAXI1nEa" + }, + "source": [ + "# Part 2: Train the agent" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lArLOFcJ7VMO" + }, + "source": [ + "## Train" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "g1F84mebj4gu" + }, + "outputs": [], + "source": [ + "ERL_PARAMS = {\"learning_rate\": 3e-6,\"batch_size\": 2048,\"gamma\": 0.985,\n", + " \"seed\":312,\"net_dimension\":[128,64], \"target_step\":5000, \"eval_gap\":30,\n", + " \"eval_times\":1}\n", + "env = StockTradingEnv\n", + "# if you want to use larger datasets (change to longer period), and it raises error,\n", + "# please try to increase \"target_step\". It should be larger than the episode steps." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BxcNI2fdNjip" + }, + "outputs": [], + "source": [ + "train(start_date = '2022-08-25',\n", + " end_date = '2022-08-31',\n", + " ticker_list = ticker_list,\n", + " data_source = 'alpaca',\n", + " time_interval= '1Min',\n", + " technical_indicator_list= INDICATORS,\n", + " drl_lib='elegantrl',\n", + " env=env,\n", + " model_name='ppo',\n", + " if_vix=True,\n", + " API_KEY = API_KEY,\n", + " API_SECRET = API_SECRET,\n", + " erl_params=ERL_PARAMS,\n", + " cwd='./papertrading_erl', #current_working_dir\n", + " break_step=1e5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "g37WugV_1pAS" + }, + "source": [ + "## Test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SxYoWCDa02TW" + }, + "outputs": [], + "source": [ + "account_value_erl=test(start_date = '2022-09-01',\n", + " end_date = '2022-09-02',\n", + " ticker_list = ticker_list,\n", + " data_source = 'alpaca',\n", + " time_interval= '1Min',\n", + " technical_indicator_list= INDICATORS,\n", + " drl_lib='elegantrl',\n", + " env=env,\n", + " model_name='ppo',\n", + " if_vix=True,\n", + " API_KEY = API_KEY,\n", + " API_SECRET = API_SECRET,\n", + " cwd='./papertrading_erl',\n", + " net_dimension = ERL_PARAMS['net_dimension'])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "e8aNQ58X7avM" + }, + "source": [ + "## Use full data to train" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3CQ9_Yv41r88" + }, + "source": [ + "After tuning well, retrain on the training and testing sets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cUSgbwt_10V3" + }, + "outputs": [], + "source": [ + "train(start_date = '2022-08-25',\n", + " end_date = '2022-09-02',\n", + " ticker_list = ticker_list,\n", + " data_source = 'alpaca',\n", + " time_interval= '1Min',\n", + " technical_indicator_list= INDICATORS,\n", + " drl_lib='elegantrl',\n", + " env=env,\n", + " model_name='ppo',\n", + " if_vix=True,\n", + " API_KEY = API_KEY,\n", + " API_SECRET = API_SECRET,\n", + " erl_params=ERL_PARAMS,\n", + " cwd='./papertrading_erl_retrain',\n", + " break_step=2e5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sIQN6Ggt7gXY" + }, + "source": [ + "# Part 3: Deploy the agent" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UFoxkigg1zXa" + }, + "source": [ + "## Setup Alpaca Paper trading environment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LpkoZpYzfneS" + }, + "outputs": [], + "source": [ + "import datetime\n", + "import threading\n", + "from finrl.meta.data_processors.processor_alpaca import AlpacaProcessor\n", + "from alpaca.trading.client import TradingClient\n", + "from alpaca.trading.requests import GetOrdersRequest\n", + "from alpaca.trading.enums import OrderSide, QueryOrderStatus\n", + "import time\n", + "import pandas as pd\n", + "import numpy as np\n", + "import torch\n", + "import gymnasium as gym\n", + "\n", + "class AlpacaPaperTrading():\n", + "\n", + " def __init__(self,ticker_list, time_interval, drl_lib, agent, cwd, net_dim,\n", + " state_dim, action_dim, API_KEY, API_SECRET,\n", + " tech_indicator_list, turbulence_thresh=30,\n", + " max_stock=1e2, latency = None):\n", + " #load agent\n", + " self.drl_lib = drl_lib\n", + " if agent =='ppo':\n", + " if drl_lib == 'elegantrl':\n", + " agent_class = AgentPPO\n", + " agent = agent_class(net_dim, state_dim, action_dim)\n", + " actor = agent.act\n", + " # load agent\n", + " try:\n", + " cwd = cwd + '/actor.pth'\n", + " print(f\"| load actor from: {cwd}\")\n", + " actor.load_state_dict(torch.load(cwd, map_location=lambda storage, loc: storage))\n", + " self.act = actor\n", + " self.device = agent.device\n", + " except BaseException:\n", + " raise ValueError(\"Fail to load agent!\")\n", + "\n", + " elif drl_lib == 'rllib':\n", + " from ray.rllib.agents import ppo\n", + " from ray.rllib.agents.ppo.ppo import PPOTrainer\n", + "\n", + " config = ppo.DEFAULT_CONFIG.copy()\n", + " config['env'] = StockEnvEmpty\n", + " config[\"log_level\"] = \"WARN\"\n", + " config['env_config'] = {'state_dim':state_dim,\n", + " 'action_dim':action_dim,}\n", + " trainer = PPOTrainer(env=StockEnvEmpty, config=config)\n", + " trainer.restore(cwd)\n", + " try:\n", + " trainer.restore(cwd)\n", + " self.agent = trainer\n", + " print(\"Restoring from checkpoint path\", cwd)\n", + " except:\n", + " raise ValueError('Fail to load agent!')\n", + "\n", + " elif drl_lib == 'stable_baselines3':\n", + " from stable_baselines3 import PPO\n", + "\n", + " try:\n", + " #load agent\n", + " self.model = PPO.load(cwd)\n", + " print(\"Successfully load model\", cwd)\n", + " except:\n", + " raise ValueError('Fail to load agent!')\n", + "\n", + " else:\n", + " raise ValueError('The DRL library input is NOT supported yet. Please check your input.')\n", + "\n", + " else:\n", + " raise ValueError('Agent input is NOT supported yet.')\n", + "\n", + "\n", + "\n", + " #connect to Alpaca trading API\n", + " try:\n", + " self.alpaca = TradingClient(\n", + " api_key=API_KEY, secret_key=API_SECRET, paper=True\n", + " )\n", + " except:\n", + " raise ValueError('Fail to connect Alpaca. Please check account info and internet connection.')\n", + "\n", + " #read trading time interval\n", + " if time_interval == '1s':\n", + " self.time_interval = 1\n", + " elif time_interval == '5s':\n", + " self.time_interval = 5\n", + " elif time_interval == '1Min':\n", + " self.time_interval = 60\n", + " elif time_interval == '5Min':\n", + " self.time_interval = 60 * 5\n", + " elif time_interval == '15Min':\n", + " self.time_interval = 60 * 15\n", + " else:\n", + " raise ValueError('Time interval input is NOT supported yet.')\n", + "\n", + " #read trading settings\n", + " self.tech_indicator_list = tech_indicator_list\n", + " self.turbulence_thresh = turbulence_thresh\n", + " self.max_stock = max_stock\n", + "\n", + " #initialize account\n", + " self.stocks = np.asarray([0] * len(ticker_list)) #stocks holding\n", + " self.stocks_cd = np.zeros_like(self.stocks)\n", + " self.cash = None #cash record\n", + " self.stocks_df = pd.DataFrame(self.stocks, columns=['stocks'], index = ticker_list)\n", + " self.asset_list = []\n", + " self.price = np.asarray([0] * len(ticker_list))\n", + " self.stockUniverse = ticker_list\n", + " self.turbulence_bool = 0\n", + " self.equities = []\n", + "\n", + " def test_latency(self, test_times = 10):\n", + " total_time = 0\n", + " for i in range(0, test_times):\n", + " time0 = time.time()\n", + " self.get_state()\n", + " time1 = time.time()\n", + " temp_time = time1 - time0\n", + " total_time += temp_time\n", + " latency = total_time/test_times\n", + " print('latency for data processing: ', latency)\n", + " return latency\n", + "\n", + " def run(self):\n", + " # params to filter orders by\n", + " request_params = GetOrdersRequest(\n", + " status=QueryOrderStatus.OPEN\n", + " )\n", + "\n", + " # orders that satisfy params\n", + " orders = self.alpaca.get_orders(filter=request_params)\n", + " for order in orders:\n", + " self.alpaca.cancel_order(order.id)\n", + "\n", + " # Wait for market to open.\n", + " print(\"Waiting for market to open...\")\n", + " self.awaitMarketOpen()\n", + " print(\"Market opened.\")\n", + "\n", + " while True:\n", + "\n", + " # Figure out when the market will close so we can prepare to sell beforehand.\n", + " clock = self.alpaca.get_clock()\n", + " closingTime = clock.next_close.replace(tzinfo=datetime.timezone.utc).timestamp()\n", + " currTime = clock.timestamp.replace(tzinfo=datetime.timezone.utc).timestamp()\n", + " self.timeToClose = closingTime - currTime\n", + "\n", + " if(self.timeToClose < (60)):\n", + " # Close all positions when 1 minutes til market close.\n", + " print(\"Market closing soon. Stop trading.\")\n", + " break\n", + "\n", + " '''# Close all positions when 1 minutes til market close.\n", + " print(\"Market closing soon. Closing positions.\")\n", + "\n", + " threads = []\n", + " positions = self.alpaca.list_positions()\n", + " for position in positions:\n", + " if(position.side == 'long'):\n", + " orderSide = 'sell'\n", + " else:\n", + " orderSide = 'buy'\n", + " qty = abs(int(float(position.qty)))\n", + " respSO = []\n", + " tSubmitOrder = threading.Thread(target=self.submitOrder(qty, position.symbol, orderSide, respSO))\n", + " tSubmitOrder.start()\n", + " threads.append(tSubmitOrder) # record thread for joining later\n", + "\n", + " for x in threads: # wait for all threads to complete\n", + " x.join()\n", + " # Run script again after market close for next trading day.\n", + " print(\"Sleeping until market close (15 minutes).\")\n", + " time.sleep(60 * 15)'''\n", + "\n", + " else:\n", + " self.trade()\n", + " last_equity = float(self.alpaca.get_account().last_equity)\n", + " cur_time = time.time()\n", + " self.equities.append([cur_time,last_equity])\n", + " time.sleep(self.time_interval)\n", + "\n", + " def awaitMarketOpen(self):\n", + " isOpen = self.alpaca.get_clock().is_open\n", + " while(not isOpen):\n", + " clock = self.alpaca.get_clock()\n", + " openingTime = clock.next_open.replace(tzinfo=datetime.timezone.utc).timestamp()\n", + " currTime = clock.timestamp.replace(tzinfo=datetime.timezone.utc).timestamp()\n", + " timeToOpen = int((openingTime - currTime) / 60)\n", + " print(str(timeToOpen) + \" minutes til market open.\")\n", + " time.sleep(60)\n", + " isOpen = self.alpaca.get_clock().is_open\n", + "\n", + " def trade(self):\n", + " state = self.get_state()\n", + "\n", + " if self.drl_lib == 'elegantrl':\n", + " with torch.no_grad():\n", + " s_tensor = torch.as_tensor((state,), device=self.device)\n", + " a_tensor = self.act(s_tensor)\n", + " action = a_tensor.detach().cpu().numpy()[0]\n", + " action = (action * self.max_stock).astype(int)\n", + "\n", + " elif self.drl_lib == 'rllib':\n", + " action = self.agent.compute_single_action(state)\n", + "\n", + " elif self.drl_lib == 'stable_baselines3':\n", + " action = self.model.predict(state)[0]\n", + "\n", + " else:\n", + " raise ValueError('The DRL library input is NOT supported yet. Please check your input.')\n", + "\n", + " self.stocks_cd += 1\n", + " if self.turbulence_bool == 0:\n", + " min_action = 10 # stock_cd\n", + " threads = []\n", + " for index in np.where(action < -min_action)[0]: # sell_index:\n", + " sell_num_shares = min(self.stocks[index], -action[index])\n", + " qty = abs(int(sell_num_shares))\n", + " respSO = []\n", + " tSubmitOrder = threading.Thread(target=self.submitOrder(qty, self.stockUniverse[index], 'sell', respSO))\n", + " tSubmitOrder.start()\n", + " threads.append(tSubmitOrder) # record thread for joining later\n", + " self.cash = float(self.alpaca.get_account().cash)\n", + " self.stocks_cd[index] = 0\n", + "\n", + " for x in threads: # wait for all threads to complete\n", + " x.join()\n", + "\n", + " threads = []\n", + " for index in np.where(action > min_action)[0]: # buy_index:\n", + " if self.cash < 0:\n", + " tmp_cash = 0\n", + " else:\n", + " tmp_cash = self.cash\n", + " buy_num_shares = min(tmp_cash // self.price[index], abs(int(action[index])))\n", + " if (buy_num_shares != buy_num_shares): # if buy_num_change = nan\n", + " qty = 0 # set to 0 quantity\n", + " else:\n", + " qty = abs(int(buy_num_shares))\n", + " qty = abs(int(buy_num_shares))\n", + " respSO = []\n", + " tSubmitOrder = threading.Thread(target=self.submitOrder(qty, self.stockUniverse[index], 'buy', respSO))\n", + " tSubmitOrder.start()\n", + " threads.append(tSubmitOrder) # record thread for joining later\n", + " self.cash = float(self.alpaca.get_account().cash)\n", + " self.stocks_cd[index] = 0\n", + "\n", + " for x in threads: # wait for all threads to complete\n", + " x.join()\n", + "\n", + " else: # sell all when turbulence\n", + " threads = []\n", + " positions = self.alpaca.list_positions()\n", + " for position in positions:\n", + " if(position.side == 'long'):\n", + " orderSide = 'sell'\n", + " else:\n", + " orderSide = 'buy'\n", + " qty = abs(int(float(position.qty)))\n", + " respSO = []\n", + " tSubmitOrder = threading.Thread(target=self.submitOrder(qty, position.symbol, orderSide, respSO))\n", + " tSubmitOrder.start()\n", + " threads.append(tSubmitOrder) # record thread for joining later\n", + "\n", + " for x in threads: # wait for all threads to complete\n", + " x.join()\n", + "\n", + " self.stocks_cd[:] = 0\n", + "\n", + "\n", + " def get_state(self):\n", + " alpaca = AlpacaProcessor(api=self.alpaca)\n", + " price, tech, turbulence = alpaca.fetch_latest_data(ticker_list = self.stockUniverse, time_interval='1Min',\n", + " tech_indicator_list=self.tech_indicator_list)\n", + " turbulence_bool = 1 if turbulence >= self.turbulence_thresh else 0\n", + "\n", + " turbulence = (self.sigmoid_sign(turbulence, self.turbulence_thresh) * 2 ** -5).astype(np.float32)\n", + "\n", + " tech = tech * 2 ** -7\n", + " positions = self.alpaca.list_positions()\n", + " stocks = [0] * len(self.stockUniverse)\n", + " for position in positions:\n", + " ind = self.stockUniverse.index(position.symbol)\n", + " stocks[ind] = ( abs(int(float(position.qty))))\n", + "\n", + " stocks = np.asarray(stocks, dtype = float)\n", + " cash = float(self.alpaca.get_account().cash)\n", + " self.cash = cash\n", + " self.stocks = stocks\n", + " self.turbulence_bool = turbulence_bool\n", + " self.price = price\n", + "\n", + "\n", + "\n", + " amount = np.array(self.cash * (2 ** -12), dtype=np.float32)\n", + " scale = np.array(2 ** -6, dtype=np.float32)\n", + " state = np.hstack((amount,\n", + " turbulence,\n", + " self.turbulence_bool,\n", + " price * scale,\n", + " self.stocks * scale,\n", + " self.stocks_cd,\n", + " tech,\n", + " )).astype(np.float32)\n", + " state[np.isnan(state)] = 0.0\n", + " state[np.isinf(state)] = 0.0\n", + " print(len(self.stockUniverse))\n", + " return state\n", + "\n", + " def submitOrder(self, qty, stock, side, resp):\n", + " if(qty > 0):\n", + " try:\n", + " self.alpaca.submit_order(stock, qty, side, \"market\", \"day\")\n", + " print(\"Market order of | \" + str(qty) + \" \" + stock + \" \" + side + \" | completed.\")\n", + " resp.append(True)\n", + " except:\n", + " print(\"Order of | \" + str(qty) + \" \" + stock + \" \" + side + \" | did not go through.\")\n", + " resp.append(False)\n", + " else:\n", + " print(\"Quantity is 0, order of | \" + str(qty) + \" \" + stock + \" \" + side + \" | not completed.\")\n", + " resp.append(True)\n", + "\n", + " @staticmethod\n", + " def sigmoid_sign(ary, thresh):\n", + " def sigmoid(x):\n", + " return 1 / (1 + np.exp(-x * np.e)) - 0.5\n", + "\n", + " return sigmoid(ary / thresh) * thresh\n", + "\n", + "class StockEnvEmpty(gym.Env):\n", + " #Empty Env used for loading rllib agent\n", + " def __init__(self,config):\n", + " state_dim = config['state_dim']\n", + " action_dim = config['action_dim']\n", + " self.env_num = 1\n", + " self.max_step = 10000\n", + " self.env_name = 'StockEnvEmpty'\n", + " self.state_dim = state_dim\n", + " self.action_dim = action_dim\n", + " self.if_discrete = False\n", + " self.target_return = 9999\n", + " self.observation_space = gym.spaces.Box(low=-3000, high=3000, shape=(state_dim,), dtype=np.float32)\n", + " self.action_space = gym.spaces.Box(low=-1, high=1, shape=(action_dim,), dtype=np.float32)\n", + "\n", + " def reset(self):\n", + " return\n", + "\n", + " def step(self, actions):\n", + " return" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "os4C4-4H7ns7" + }, + "source": [ + "## Run Paper trading" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7nw0i-0UN3-7" + }, + "outputs": [], + "source": [ + "print(DOW_30_TICKER)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YsSBK9ION1t6" + }, + "outputs": [], + "source": [ + "state_dim" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xYtSv6P1N247" + }, + "outputs": [], + "source": [ + "action_dim" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Kl9nulnAJtiI" + }, + "outputs": [], + "source": [ + "paper_trading_erl = AlpacaPaperTrading(ticker_list = DOW_30_TICKER,\n", + " time_interval = '1Min',\n", + " drl_lib = 'elegantrl',\n", + " agent = 'ppo',\n", + " cwd = './papertrading_erl_retrain',\n", + " net_dim = ERL_PARAMS['net_dimension'],\n", + " state_dim = state_dim,\n", + " action_dim= action_dim,\n", + " API_KEY = API_KEY,\n", + " API_SECRET = API_SECRET,\n", + " tech_indicator_list = INDICATORS,\n", + " turbulence_thresh=30,\n", + " max_stock=1e2)\n", + "paper_trading_erl.run()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "srzBZfYEUI1O" + }, + "source": [ + "# Part 4: Check Portfolio Performance" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "chovN1UhTAht" + }, + "outputs": [], + "source": [ + "import alpaca\n", + "import pandas_market_calendars as tc\n", + "import numpy as np\n", + "import pandas as pd\n", + "import pytz\n", + "import yfinance as yf\n", + "import matplotlib.ticker as ticker\n", + "import matplotlib.dates as mdates\n", + "from datetime import datetime as dt\n", + "from finrl.plot import backtest_stats\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CaofxMNCfAR1" + }, + "outputs": [], + "source": [ + "def get_trading_days(start, end):\n", + " nyse = tc.get_calendar('NYSE')\n", + " df = nyse.date_range_htf(\"1D\", pd.Timestamp(start), pd.Timestamp(end))\n", + " # df = nyse.sessions_in_range(pd.Timestamp(start),\n", + " # pd.Timestamp(end))\n", + " trading_days = []\n", + " for day in df:\n", + " trading_days.append(str(day)[:10])\n", + "\n", + " return trading_days\n", + "\n", + "def alpaca_history(key, secret, start, end):\n", + " api = TradingClient(\n", + " api_key=API_KEY, secret_key=API_SECRET, paper=True\n", + " )\n", + " trading_days = get_trading_days(start, end)\n", + " df = pd.DataFrame()\n", + " for day in trading_days:\n", + " #df = df.append(api.get_portfolio_history(date_start = day,timeframe='5Min').df.iloc[:78])\n", + " df= pd.concat([df,api.get_portfolio_history(date_start = day,timeframe='5Min').df.iloc[:78]],ignore_index=True)\n", + "\n", + " equities = df.equity.values\n", + " cumu_returns = equities/equities[0]\n", + " cumu_returns = cumu_returns[~np.isnan(cumu_returns)]\n", + "\n", + " return df, cumu_returns\n", + "\n", + "def DIA_history(start):\n", + " data_df = yf.download(['^DJI'],start=start, interval=\"5m\")\n", + " data_df = data_df.iloc[:]\n", + " baseline_returns = data_df['Adj Close'].values/data_df['Adj Close'].values[0]\n", + " return data_df, baseline_returns" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5CHiZRVpURpx" + }, + "source": [ + "## Get cumulative return" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "O_YT7v-LSdfV" + }, + "outputs": [], + "source": [ + "df_erl, cumu_erl = alpaca_history(key=API_KEY,\n", + " secret=API_SECRET,\n", + " start='2022-09-01', #must be within 1 month\n", + " end='2022-09-12') #change the date if error occurs\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IMcQjwHOS6Zb" + }, + "outputs": [], + "source": [ + "df_djia, cumu_djia = DIA_history(start='2022-09-01')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PJXPwmx9Ts5o" + }, + "outputs": [], + "source": [ + "df_erl.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "o1Iaw90FTNfU" + }, + "outputs": [], + "source": [ + "returns_erl = cumu_erl -1\n", + "returns_dia = cumu_djia - 1\n", + "returns_dia = returns_dia[:returns_erl.shape[0]]\n", + "print('len of erl return: ', returns_erl.shape[0])\n", + "print('len of dia return: ', returns_dia.shape[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2IawaMsDwZni" + }, + "outputs": [], + "source": [ + "returns_erl" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5Z0LEm7KUZ5W" + }, + "source": [ + "## plot and save" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Foqk1wIQTQJ3" + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "plt.figure(dpi=1000)\n", + "plt.grid()\n", + "plt.grid(which='minor', axis='y')\n", + "plt.title('Stock Trading (Paper trading)', fontsize=20)\n", + "plt.plot(returns_erl, label = 'ElegantRL Agent', color = 'red')\n", + "#plt.plot(returns_sb3, label = 'Stable-Baselines3 Agent', color = 'blue' )\n", + "#plt.plot(returns_rllib, label = 'RLlib Agent', color = 'green')\n", + "plt.plot(returns_dia, label = 'DJIA', color = 'grey')\n", + "plt.ylabel('Return', fontsize=16)\n", + "plt.xlabel('Year 2021', fontsize=16)\n", + "plt.xticks(size = 14)\n", + "plt.yticks(size = 14)\n", + "ax = plt.gca()\n", + "ax.xaxis.set_major_locator(ticker.MultipleLocator(78))\n", + "ax.xaxis.set_minor_locator(ticker.MultipleLocator(6))\n", + "ax.yaxis.set_minor_locator(ticker.MultipleLocator(0.005))\n", + "ax.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=1, decimals=2))\n", + "ax.xaxis.set_major_formatter(ticker.FixedFormatter(['','10-19','','10-20',\n", + " '','10-21','','10-22']))\n", + "plt.legend(fontsize=10.5)\n", + "plt.savefig('papertrading_stock.png')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "O_LsHVj_TZGL" + }, + "outputs": [], + "source": [] } - ], - "source": [ - "paper_trading_erl = AlpacaPaperTrading(ticker_list = DOW_30_TICKER, \n", - " time_interval = '1Min', \n", - " drl_lib = 'elegantrl', \n", - " agent = 'ppo', \n", - " cwd = './papertrading_erl_retrain', \n", - " net_dim = ERL_PARAMS['net_dimension'], \n", - " state_dim = state_dim, \n", - " action_dim= action_dim, \n", - " API_KEY = API_KEY, \n", - " API_SECRET = API_SECRET, \n", - " API_BASE_URL = API_BASE_URL, \n", - " tech_indicator_list = INDICATORS, \n", - " turbulence_thresh=30, \n", - " max_stock=1e2)\n", - "paper_trading_erl.run()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "srzBZfYEUI1O" - }, - "source": [ - "# Part 4: Check Portfolio Performance" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "chovN1UhTAht" - }, - "outputs": [], - "source": [ - "import alpaca_trade_api as tradeapi\n", - "import pandas_market_calendars as tc\n", - "import numpy as np\n", - "import pandas as pd\n", - "import pytz\n", - "import yfinance as yf\n", - "import matplotlib.ticker as ticker\n", - "import matplotlib.dates as mdates\n", - "from datetime import datetime as dt\n", - "from finrl.plot import backtest_stats\n", - "import matplotlib.pyplot as plt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "CaofxMNCfAR1" - }, - "outputs": [], - "source": [ - "def get_trading_days(start, end):\n", - " nyse = tc.get_calendar('NYSE')\n", - " df = nyse.date_range_htf(\"1D\", pd.Timestamp(start), pd.Timestamp(end))\n", - " # df = nyse.sessions_in_range(pd.Timestamp(start),\n", - " # pd.Timestamp(end))\n", - " trading_days = []\n", - " for day in df:\n", - " trading_days.append(str(day)[:10])\n", - "\n", - " return trading_days\n", - "\n", - "def alpaca_history(key, secret, url, start, end):\n", - " api = tradeapi.REST(key, secret, url, 'v2')\n", - " trading_days = get_trading_days(start, end)\n", - " df = pd.DataFrame()\n", - " for day in trading_days:\n", - " #df = df.append(api.get_portfolio_history(date_start = day,timeframe='5Min').df.iloc[:78])\n", - " df= pd.concat([df,api.get_portfolio_history(date_start = day,timeframe='5Min').df.iloc[:78]],ignore_index=True)\n", - " \n", - " equities = df.equity.values\n", - " cumu_returns = equities/equities[0]\n", - " cumu_returns = cumu_returns[~np.isnan(cumu_returns)]\n", - " \n", - " return df, cumu_returns\n", - "\n", - "def DIA_history(start):\n", - " data_df = yf.download(['^DJI'],start=start, interval=\"5m\")\n", - " data_df = data_df.iloc[:]\n", - " baseline_returns = data_df['Adj Close'].values/data_df['Adj Close'].values[0]\n", - " return data_df, baseline_returns" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5CHiZRVpURpx" - }, - "source": [ - "## Get cumulative return" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "O_YT7v-LSdfV" - }, - "outputs": [], - "source": [ - "df_erl, cumu_erl = alpaca_history(key=API_KEY, \n", - " secret=API_SECRET, \n", - " url=API_BASE_URL, \n", - " start='2022-09-01', #must be within 1 month\n", - " end='2022-09-12') #change the date if error occurs\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "IMcQjwHOS6Zb", - "outputId": "1fb21460-1da9-4998-f0c0-fcbf5b056e66" - }, - "outputs": [], - "source": [ - "df_djia, cumu_djia = DIA_history(start='2022-09-01')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 238 - }, - "id": "PJXPwmx9Ts5o", - "outputId": "c59014eb-c2f9-4be2-8a87-7892cc0b1094" - }, - "outputs": [], - "source": [ - "df_erl.tail()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { + ], + "metadata": { "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "o1Iaw90FTNfU", - "outputId": "0629dca2-d9dd-4c2a-e363-dc0f01daba41" - }, - "outputs": [], - "source": [ - "returns_erl = cumu_erl -1 \n", - "returns_dia = cumu_djia - 1\n", - "returns_dia = returns_dia[:returns_erl.shape[0]]\n", - "print('len of erl return: ', returns_erl.shape[0])\n", - "print('len of dia return: ', returns_dia.shape[0])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "2IawaMsDwZni" - }, - "outputs": [], - "source": [ - "returns_erl" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5Z0LEm7KUZ5W" - }, - "source": [ - "## plot and save" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Foqk1wIQTQJ3" - }, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "plt.figure(dpi=1000)\n", - "plt.grid()\n", - "plt.grid(which='minor', axis='y')\n", - "plt.title('Stock Trading (Paper trading)', fontsize=20)\n", - "plt.plot(returns_erl, label = 'ElegantRL Agent', color = 'red')\n", - "#plt.plot(returns_sb3, label = 'Stable-Baselines3 Agent', color = 'blue' )\n", - "#plt.plot(returns_rllib, label = 'RLlib Agent', color = 'green')\n", - "plt.plot(returns_dia, label = 'DJIA', color = 'grey')\n", - "plt.ylabel('Return', fontsize=16)\n", - "plt.xlabel('Year 2021', fontsize=16)\n", - "plt.xticks(size = 14)\n", - "plt.yticks(size = 14)\n", - "ax = plt.gca()\n", - "ax.xaxis.set_major_locator(ticker.MultipleLocator(78))\n", - "ax.xaxis.set_minor_locator(ticker.MultipleLocator(6))\n", - "ax.yaxis.set_minor_locator(ticker.MultipleLocator(0.005))\n", - "ax.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=1, decimals=2))\n", - "ax.xaxis.set_major_formatter(ticker.FixedFormatter(['','10-19','','10-20',\n", - " '','10-21','','10-22']))\n", - "plt.legend(fontsize=10.5)\n", - "plt.savefig('papertrading_stock.png')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "O_LsHVj_TZGL" - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [ - "0EVJIQUR6_fu", - "9tzAw9k26nAC", - "zjLda8No6pvI", - "pf5aVHAU-xF6", - "rZMkcyjZ-25l", - "3rwy7V72-8YY", - "J25MuZLiGqCP", - "eW0UDAXI1nEa", - "UFoxkigg1zXa" - ], - "provenance": [] - }, - "gpuClass": "standard", - "kernelspec": { - "display_name": "Python 3.8.10 ('venv': venv)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" + "collapsed_sections": [ + "0EVJIQUR6_fu", + "9tzAw9k26nAC", + "zjLda8No6pvI", + "pf5aVHAU-xF6", + "rZMkcyjZ-25l", + "3rwy7V72-8YY", + "J25MuZLiGqCP", + "eW0UDAXI1nEa", + "UFoxkigg1zXa" + ], + "provenance": [], + "include_colab_link": true + }, + "gpuClass": "standard", + "kernelspec": { + "display_name": "Python 3.8.10 ('venv': venv)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + }, + "vscode": { + "interpreter": { + "hash": "005d14239094016f48a03a57365c4ccb734e3f38c20ed0ca595d84f773bc39cd" + } + } }, - "vscode": { - "interpreter": { - "hash": "005d14239094016f48a03a57365c4ccb734e3f38c20ed0ca595d84f773bc39cd" - } - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file From d3b406654cee3f09dc5c8cd81800faabd364f027 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 11 Sep 2025 00:29:55 +0000 Subject: [PATCH 17/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- examples/FinRL_PaperTrading_Demo.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/FinRL_PaperTrading_Demo.ipynb b/examples/FinRL_PaperTrading_Demo.ipynb index 67c15ae05c..a26816c8a9 100644 --- a/examples/FinRL_PaperTrading_Demo.ipynb +++ b/examples/FinRL_PaperTrading_Demo.ipynb @@ -1821,4 +1821,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} From 151603fccb71e1fa43d9b80b32c26629b61dfb77 Mon Sep 17 00:00:00 2001 From: kuds Date: Thu, 11 Sep 2025 11:31:26 -0500 Subject: [PATCH 18/29] Remove API_BASE_URL parameter from AlpacaProcessor --- finrl/meta/data_processors/processor_alpaca.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/finrl/meta/data_processors/processor_alpaca.py b/finrl/meta/data_processors/processor_alpaca.py index 34018ec7fa..03600fc409 100644 --- a/finrl/meta/data_processors/processor_alpaca.py +++ b/finrl/meta/data_processors/processor_alpaca.py @@ -16,7 +16,7 @@ class AlpacaProcessor: - def __init__(self, API_KEY=None, API_SECRET=None, API_BASE_URL=None, client=None): + def __init__(self, API_KEY=None, API_SECRET=None, client=None): if client is None: try: self.client = StockHistoricalDataClient(API_KEY, API_SECRET) From 169b6c993426778a045d5efe642a016fd6f5f158 Mon Sep 17 00:00:00 2001 From: kuds Date: Thu, 11 Sep 2025 12:32:37 -0500 Subject: [PATCH 19/29] Fix column reference for renaming in processor_alpaca --- finrl/meta/data_processors/processor_alpaca.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/finrl/meta/data_processors/processor_alpaca.py b/finrl/meta/data_processors/processor_alpaca.py index 03600fc409..511107f705 100644 --- a/finrl/meta/data_processors/processor_alpaca.py +++ b/finrl/meta/data_processors/processor_alpaca.py @@ -427,7 +427,7 @@ def fetch_latest_data( # Set 'timestamp' as the new index if "level_0" in barset.columns: barset.rename(columns={"level_0": "symbol"}, inplace=True) - if "level_1" in bars.columns: + if "level_1" in barset.columns: barset.rename(columns={"level_1": "timestamp"}, inplace=True) barset.set_index("timestamp", inplace=True) From 3f5e153f2c2d7b6112458192e5a549efc708eca6 Mon Sep 17 00:00:00 2001 From: kuds Date: Thu, 11 Sep 2025 12:55:42 -0500 Subject: [PATCH 20/29] Correct assignment of barset for data processing --- finrl/meta/data_processors/processor_alpaca.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/finrl/meta/data_processors/processor_alpaca.py b/finrl/meta/data_processors/processor_alpaca.py index 511107f705..2154bf2da3 100644 --- a/finrl/meta/data_processors/processor_alpaca.py +++ b/finrl/meta/data_processors/processor_alpaca.py @@ -432,7 +432,7 @@ def fetch_latest_data( barset.set_index("timestamp", inplace=True) # Reorder and rename columns as needed - barset = bars[ + barset = barset[ [ "close", "high", From 5634761ebc68f2b2b6ff502d716c4a5b326b93a6 Mon Sep 17 00:00:00 2001 From: kuds Date: Thu, 11 Sep 2025 13:59:33 -0500 Subject: [PATCH 21/29] Refactor Alpaca API integration in paper trading --- .../env_stock_papertrading.py | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/finrl/meta/env_stock_trading/env_stock_papertrading.py b/finrl/meta/env_stock_trading/env_stock_papertrading.py index 7c6a4b0bf3..f2551273dd 100644 --- a/finrl/meta/env_stock_trading/env_stock_papertrading.py +++ b/finrl/meta/env_stock_trading/env_stock_papertrading.py @@ -4,7 +4,9 @@ import threading import time -import alpaca as tradeapi +from alpaca.trading.client import TradingClient +from alpaca.trading.requests import GetOrdersRequest +from alpaca.trading.enums import OrderSide, QueryOrderStatus import gymnasium as gym import numpy as np import pandas as pd @@ -12,7 +14,6 @@ from finrl.meta.data_processors.processor_alpaca import AlpacaProcessor - class AlpacaPaperTrading: def __init__( self, @@ -26,7 +27,6 @@ def __init__( action_dim, API_KEY, API_SECRET, - API_BASE_URL, tech_indicator_list, turbulence_thresh=30, max_stock=1e2, @@ -98,7 +98,9 @@ def __init__( # connect to Alpaca trading API try: - self.alpaca = tradeapi.REST(API_KEY, API_SECRET, API_BASE_URL, "v2") + self.alpaca = TradingClient( + api_key=API_KEY, secret_key=API_SECRET, paper=True + ) except: raise ValueError( "Fail to connect Alpaca. Please check account info and internet connection." @@ -153,7 +155,13 @@ def test_latency(self, test_times=10): return latency def run(self): - orders = self.alpaca.list_orders(status="open") + # params to filter orders by + request_params = GetOrdersRequest( + status=QueryOrderStatus.OPEN + ) + + # orders that satisfy params + orders = self.alpaca.get_orders(filter=request_params) for order in orders: self.alpaca.cancel_order(order.id) @@ -180,7 +188,7 @@ def run(self): """# Close all positions when 1 minutes til market close. print("Market closing soon. Closing positions.") - positions = self.alpaca.list_positions() + positions = self.alpaca.get_all_positions() for position in positions: if(position.side == 'long'): orderSide = 'sell' @@ -278,7 +286,7 @@ def trade(self): self.stocks_cd[index] = 0 else: # sell all when turbulence - positions = self.alpaca.list_positions() + positions = self.alpaca.get_all_positions() for position in positions: if position.side == "long": orderSide = "sell" @@ -295,7 +303,7 @@ def trade(self): self.stocks_cd[:] = 0 def get_state(self): - alpaca = AlpacaProcessor(api=self.alpaca) + alpaca = AlpacaProcessor(API_KEY=API_KEY, API_SECRET=API_SECRET) price, tech, turbulence = alpaca.fetch_latest_data( ticker_list=self.stockUniverse, time_interval="1Min", @@ -308,7 +316,7 @@ def get_state(self): ).astype(np.float32) tech = tech * 2**-7 - positions = self.alpaca.list_positions() + positions = self.alpaca.get_all_positions() stocks = [0] * len(self.stockUniverse) for position in positions: ind = self.stockUniverse.index(position.symbol) From ff6eeca33c19b3b343d53b2861758eb8f2daa582 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 11 Sep 2025 18:59:44 +0000 Subject: [PATCH 22/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../meta/env_stock_trading/env_stock_papertrading.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/finrl/meta/env_stock_trading/env_stock_papertrading.py b/finrl/meta/env_stock_trading/env_stock_papertrading.py index f2551273dd..b8e49a1290 100644 --- a/finrl/meta/env_stock_trading/env_stock_papertrading.py +++ b/finrl/meta/env_stock_trading/env_stock_papertrading.py @@ -4,16 +4,18 @@ import threading import time -from alpaca.trading.client import TradingClient -from alpaca.trading.requests import GetOrdersRequest -from alpaca.trading.enums import OrderSide, QueryOrderStatus import gymnasium as gym import numpy as np import pandas as pd import torch +from alpaca.trading.client import TradingClient +from alpaca.trading.enums import OrderSide +from alpaca.trading.enums import QueryOrderStatus +from alpaca.trading.requests import GetOrdersRequest from finrl.meta.data_processors.processor_alpaca import AlpacaProcessor + class AlpacaPaperTrading: def __init__( self, @@ -156,9 +158,7 @@ def test_latency(self, test_times=10): def run(self): # params to filter orders by - request_params = GetOrdersRequest( - status=QueryOrderStatus.OPEN - ) + request_params = GetOrdersRequest(status=QueryOrderStatus.OPEN) # orders that satisfy params orders = self.alpaca.get_orders(filter=request_params) From 5d07950a589f24131df4e312e38adcf3d99c2fb2 Mon Sep 17 00:00:00 2001 From: kuds Date: Thu, 11 Sep 2025 14:01:22 -0500 Subject: [PATCH 23/29] Skip processing for single column DataFrame --- finrl/meta/data_processors/processor_alpaca.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/finrl/meta/data_processors/processor_alpaca.py b/finrl/meta/data_processors/processor_alpaca.py index 2154bf2da3..664ba2a07c 100644 --- a/finrl/meta/data_processors/processor_alpaca.py +++ b/finrl/meta/data_processors/processor_alpaca.py @@ -424,6 +424,10 @@ def fetch_latest_data( # Now reset the index barset.reset_index(inplace=True) + # If one column is returned, do not process + if(len(barset.columns) <= 1): + continue + # Set 'timestamp' as the new index if "level_0" in barset.columns: barset.rename(columns={"level_0": "symbol"}, inplace=True) From 05951e5cfc2d51d8ac26faa4c6d7bd3c5d53a004 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 11 Sep 2025 19:01:34 +0000 Subject: [PATCH 24/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- finrl/meta/data_processors/processor_alpaca.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/finrl/meta/data_processors/processor_alpaca.py b/finrl/meta/data_processors/processor_alpaca.py index 664ba2a07c..7734e9def0 100644 --- a/finrl/meta/data_processors/processor_alpaca.py +++ b/finrl/meta/data_processors/processor_alpaca.py @@ -425,7 +425,7 @@ def fetch_latest_data( barset.reset_index(inplace=True) # If one column is returned, do not process - if(len(barset.columns) <= 1): + if len(barset.columns) <= 1: continue # Set 'timestamp' as the new index From 934bd661516df1c252aefe27fe810d2263a7a8ec Mon Sep 17 00:00:00 2001 From: kuds Date: Thu, 11 Sep 2025 14:38:07 -0500 Subject: [PATCH 25/29] Refactor Alpaca API integration for portfolio history --- finrl/meta/paper_trading/common.py | 31 +++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/finrl/meta/paper_trading/common.py b/finrl/meta/paper_trading/common.py index b4d0ade751..14a48210d9 100644 --- a/finrl/meta/paper_trading/common.py +++ b/finrl/meta/paper_trading/common.py @@ -798,13 +798,14 @@ def test( # ----------------------------------------------------------------------------------------------------------------------------------------- -import alpaca as tradeapi +from alpaca.trading.client import TradingClient +from alpaca.trading.models import TradeAccount, PortfolioHistory +from alpaca.trading.requests import GetOrdersRequest, GetPortfolioHistoryRequest import pandas_market_calendars as tc import numpy as np import pandas as pd import yfinance as yf - def get_trading_days(start, end): nyse = tc.get_calendar("NYSE") df = nyse.date_range_htf("1D", pd.Timestamp(start), pd.Timestamp(end)) @@ -817,16 +818,28 @@ def get_trading_days(start, end): return trading_days -def alpaca_history(key, secret, url, start, end): - api = tradeapi.REST(key, secret, url, "v2") +def alpaca_history(key, secret, start, end): + trading_client = TradingClient(key, secret, paper=True) + trade_account = trading_client.get_account() trading_days = get_trading_days(start, end) df = pd.DataFrame() for day in trading_days: - df = df.append( - api.get_portfolio_history(date_start=day, timeframe="5Min").df.iloc[:78] - ) + portfoil_history_request = GetPortfolioHistoryRequest(start=day, timeframe='1D') + portfoil_history = trading_client.get_portfolio_history(history_filter = portfoil_history_request) + # Create a DataFrame from the relevant parts of the JSON + df = pd.DataFrame({ + 'timestamp': portfoil_history.timestamp, + 'equity': portfoil_history.equity, + 'profit_loss': portfoil_history.profit_loss, + 'profit_loss_pct': portfoil_history.profit_loss_pct + }) + + # Convert the 'timestamp' column to a readable datetime format + df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s') + # df= pd.concat([df, portfoil_history.df.iloc[:78]],ignore_index=True) + equities = df.equity.values - cumu_returns = equities / equities[0] + cumu_returns = equities/equities[0] cumu_returns = cumu_returns[~np.isnan(cumu_returns)] return df, cumu_returns @@ -835,7 +848,7 @@ def alpaca_history(key, secret, url, start, end): def DIA_history(start): data_df = yf.download(["^DJI"], start=start, interval="5m") data_df = data_df.iloc[:] - baseline_returns = data_df["Adj Close"].values / data_df["Adj Close"].values[0] + baseline_returns = data_df["Close"].values / data_df["Close"].values[0] return data_df, baseline_returns From 2dc4c3d319a8be91b907e4db4ee732f286ea7853 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 11 Sep 2025 19:38:42 +0000 Subject: [PATCH 26/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- finrl/meta/paper_trading/common.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/finrl/meta/paper_trading/common.py b/finrl/meta/paper_trading/common.py index 14a48210d9..cfc247a6cb 100644 --- a/finrl/meta/paper_trading/common.py +++ b/finrl/meta/paper_trading/common.py @@ -806,6 +806,7 @@ def test( import pandas as pd import yfinance as yf + def get_trading_days(start, end): nyse = tc.get_calendar("NYSE") df = nyse.date_range_htf("1D", pd.Timestamp(start), pd.Timestamp(end)) @@ -824,22 +825,26 @@ def alpaca_history(key, secret, start, end): trading_days = get_trading_days(start, end) df = pd.DataFrame() for day in trading_days: - portfoil_history_request = GetPortfolioHistoryRequest(start=day, timeframe='1D') - portfoil_history = trading_client.get_portfolio_history(history_filter = portfoil_history_request) + portfoil_history_request = GetPortfolioHistoryRequest(start=day, timeframe="1D") + portfoil_history = trading_client.get_portfolio_history( + history_filter=portfoil_history_request + ) # Create a DataFrame from the relevant parts of the JSON - df = pd.DataFrame({ - 'timestamp': portfoil_history.timestamp, - 'equity': portfoil_history.equity, - 'profit_loss': portfoil_history.profit_loss, - 'profit_loss_pct': portfoil_history.profit_loss_pct - }) + df = pd.DataFrame( + { + "timestamp": portfoil_history.timestamp, + "equity": portfoil_history.equity, + "profit_loss": portfoil_history.profit_loss, + "profit_loss_pct": portfoil_history.profit_loss_pct, + } + ) # Convert the 'timestamp' column to a readable datetime format - df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s') + df["timestamp"] = pd.to_datetime(df["timestamp"], unit="s") # df= pd.concat([df, portfoil_history.df.iloc[:78]],ignore_index=True) equities = df.equity.values - cumu_returns = equities/equities[0] + cumu_returns = equities / equities[0] cumu_returns = cumu_returns[~np.isnan(cumu_returns)] return df, cumu_returns From 2854b10b7f04707e1dffb0396844322d5882675d Mon Sep 17 00:00:00 2001 From: kuds Date: Thu, 11 Sep 2025 14:57:57 -0500 Subject: [PATCH 27/29] Fix elevation methods at the end to use new Aplaca-py sdk --- examples/FinRL_PaperTrading_Demo.ipynb | 53 ++++++++++++++------------ 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/examples/FinRL_PaperTrading_Demo.ipynb b/examples/FinRL_PaperTrading_Demo.ipynb index a26816c8a9..1e3b1df38d 100644 --- a/examples/FinRL_PaperTrading_Demo.ipynb +++ b/examples/FinRL_PaperTrading_Demo.ipynb @@ -19,15 +19,6 @@ "Disclaimer: Nothing herein is financial advice, and NOT a recommendation to trade real money. Many platforms exist for simulated trading (paper trading) which can be used for building and developing the methods discussed. Please use common sense and always first consult a professional before trading or investing." ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "yhzqm7zYB1Xg" - }, - "source": [ - "\"Open" - ] - }, { "cell_type": "markdown", "metadata": { @@ -1324,7 +1315,7 @@ " print(\"Market closing soon. Closing positions.\")\n", "\n", " threads = []\n", - " positions = self.alpaca.list_positions()\n", + " positions = self.alpaca..get_all_positions()\n", " for position in positions:\n", " if(position.side == 'long'):\n", " orderSide = 'sell'\n", @@ -1420,7 +1411,7 @@ "\n", " else: # sell all when turbulence\n", " threads = []\n", - " positions = self.alpaca.list_positions()\n", + " positions = self.alpaca.get_all_positions()\n", " for position in positions:\n", " if(position.side == 'long'):\n", " orderSide = 'sell'\n", @@ -1439,7 +1430,7 @@ "\n", "\n", " def get_state(self):\n", - " alpaca = AlpacaProcessor(api=self.alpaca)\n", + " alpaca = AlpacaProcessor(API_KEY=API_KEY, API_SECRET=API_SECRET)\n", " price, tech, turbulence = alpaca.fetch_latest_data(ticker_list = self.stockUniverse, time_interval='1Min',\n", " tech_indicator_list=self.tech_indicator_list)\n", " turbulence_bool = 1 if turbulence >= self.turbulence_thresh else 0\n", @@ -1447,7 +1438,7 @@ " turbulence = (self.sigmoid_sign(turbulence, self.turbulence_thresh) * 2 ** -5).astype(np.float32)\n", "\n", " tech = tech * 2 ** -7\n", - " positions = self.alpaca.list_positions()\n", + " positions = self.alpaca.get_all_positions()\n", " stocks = [0] * len(self.stockUniverse)\n", " for position in positions:\n", " ind = self.stockUniverse.index(position.symbol)\n", @@ -1576,7 +1567,7 @@ " cwd = './papertrading_erl_retrain',\n", " net_dim = ERL_PARAMS['net_dimension'],\n", " state_dim = state_dim,\n", - " action_dim= action_dim,\n", + " action_dim = action_dim,\n", " API_KEY = API_KEY,\n", " API_SECRET = API_SECRET,\n", " tech_indicator_list = INDICATORS,\n", @@ -1602,7 +1593,9 @@ }, "outputs": [], "source": [ - "import alpaca\n", + "from alpaca.trading.client import TradingClient\n", + "from alpaca.trading.models import TradeAccount, PortfolioHistory\n", + "from alpaca.trading.requests import GetOrdersRequest, GetPortfolioHistoryRequest\n", "import pandas_market_calendars as tc\n", "import numpy as np\n", "import pandas as pd\n", @@ -1635,14 +1628,24 @@ " return trading_days\n", "\n", "def alpaca_history(key, secret, start, end):\n", - " api = TradingClient(\n", - " api_key=API_KEY, secret_key=API_SECRET, paper=True\n", - " )\n", + " trading_client = TradingClient(key, secret, paper=True)\n", + " trade_account = trading_client.get_account()\n", " trading_days = get_trading_days(start, end)\n", " df = pd.DataFrame()\n", " for day in trading_days:\n", - " #df = df.append(api.get_portfolio_history(date_start = day,timeframe='5Min').df.iloc[:78])\n", - " df= pd.concat([df,api.get_portfolio_history(date_start = day,timeframe='5Min').df.iloc[:78]],ignore_index=True)\n", + " portfoil_history_request = GetPortfolioHistoryRequest(start=day, timeframe='1D')\n", + " portfoil_history = trading_client.get_portfolio_history(history_filter = portfoil_history_request)\n", + " # Create a DataFrame from the relevant parts of the JSON\n", + " df = pd.DataFrame({\n", + " 'timestamp': portfoil_history.timestamp,\n", + " 'equity': portfoil_history.equity,\n", + " 'profit_loss': portfoil_history.profit_loss,\n", + " 'profit_loss_pct': portfoil_history.profit_loss_pct\n", + " })\n", + "\n", + " # Convert the 'timestamp' column to a readable datetime format\n", + " df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')\n", + " # df= pd.concat([df, portfoil_history.df.iloc[:78]],ignore_index=True)\n", "\n", " equities = df.equity.values\n", " cumu_returns = equities/equities[0]\n", @@ -1653,7 +1656,7 @@ "def DIA_history(start):\n", " data_df = yf.download(['^DJI'],start=start, interval=\"5m\")\n", " data_df = data_df.iloc[:]\n", - " baseline_returns = data_df['Adj Close'].values/data_df['Adj Close'].values[0]\n", + " baseline_returns = data_df['Close'].values/data_df['Close'].values[0]\n", " return data_df, baseline_returns" ] }, @@ -1676,8 +1679,8 @@ "source": [ "df_erl, cumu_erl = alpaca_history(key=API_KEY,\n", " secret=API_SECRET,\n", - " start='2022-09-01', #must be within 1 month\n", - " end='2022-09-12') #change the date if error occurs\n" + " start='2025-09-01', #must be within 1 month\n", + " end='2025-09-10') #change the date if error occurs\n" ] }, { @@ -1688,7 +1691,7 @@ }, "outputs": [], "source": [ - "df_djia, cumu_djia = DIA_history(start='2022-09-01')" + "df_djia, cumu_djia = DIA_history(start='2025-09-01')" ] }, { @@ -1821,4 +1824,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} +} \ No newline at end of file From 98ed7bf50211688dd2052b36e31c16dcf4c540d7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 11 Sep 2025 19:58:09 +0000 Subject: [PATCH 28/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- examples/FinRL_PaperTrading_Demo.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/FinRL_PaperTrading_Demo.ipynb b/examples/FinRL_PaperTrading_Demo.ipynb index 1e3b1df38d..bca4f1267a 100644 --- a/examples/FinRL_PaperTrading_Demo.ipynb +++ b/examples/FinRL_PaperTrading_Demo.ipynb @@ -1824,4 +1824,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} From cbbcc79f6ad58245b770682a2e8dd16a9e7ec8a1 Mon Sep 17 00:00:00 2001 From: kuds Date: Thu, 11 Sep 2025 14:59:09 -0500 Subject: [PATCH 29/29] Update Colab link in PaperTrading demo notebook --- examples/FinRL_PaperTrading_Demo.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/FinRL_PaperTrading_Demo.ipynb b/examples/FinRL_PaperTrading_Demo.ipynb index bca4f1267a..daebf346a1 100644 --- a/examples/FinRL_PaperTrading_Demo.ipynb +++ b/examples/FinRL_PaperTrading_Demo.ipynb @@ -7,7 +7,7 @@ "colab_type": "text" }, "source": [ - "\"Open" + "\"Open" ] }, {