diff --git a/examples/FinRL_PaperTrading_Demo.ipynb b/examples/FinRL_PaperTrading_Demo.ipynb
index f54a01b7f3..daebf346a1 100644
--- a/examples/FinRL_PaperTrading_Demo.ipynb
+++ b/examples/FinRL_PaperTrading_Demo.ipynb
@@ -1,1892 +1,1827 @@
{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "V1ofncK2cYhs"
- },
- "source": [
- "Disclaimer: Nothing herein is financial advice, and NOT a recommendation to trade real money. Many platforms exist for simulated trading (paper trading) which can be used for building and developing the methods discussed. Please use common sense and always first consult a professional before trading or investing."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "
"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "j3mbRu3s1YlD"
- },
- "source": [
- "# Part 1: Install FinRL"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "0gkmsPgbvNf6"
- },
- "outputs": [],
- "source": [
- "## install finrl library\n",
- "!pip install wrds\n",
- "!pip install swig\n",
- "!pip install -q condacolab\n",
- "import condacolab\n",
- "condacolab.install()\n",
- "!apt-get update -y -qq && apt-get install -y -qq cmake libopenmpi-dev python3-dev zlib1g-dev libgl1-mesa-glx swig\n",
- "!pip install git+https://github.com/AI4Finance-Foundation/FinRL.git\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "3rwy7V72-8YY"
- },
- "source": [
- "## Get the API Keys Ready"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "8Z6qlLXY-fA2"
- },
- "outputs": [],
- "source": [
- "API_KEY = \"\"\n",
- "API_SECRET = \"\"\n",
- "API_BASE_URL = 'https://paper-api.alpaca.markets'\n",
- "data_url = 'wss://data.alpaca.markets'"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "--6Kx8I21erH"
- },
- "source": [
- "## Import related modules"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "H7I7zsyYfoLJ",
- "outputId": "1812c13b-410f-434c-eb07-29782ba186e6"
- },
- "outputs": [],
- "source": [
- "from finrl.config_tickers import DOW_30_TICKER\n",
- "from finrl.config import INDICATORS\n",
- "from finrl.meta.env_stock_trading.env_stocktrading_np import StockTradingEnv\n",
- "\n",
- "import numpy as np\n",
- "import pandas as pd"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "0EVJIQUR6_fu"
- },
- "source": [
- "## PPO"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "-EYx40S84tzo"
- },
- "outputs": [],
- "source": [
- "import os\n",
- "import time\n",
- "import gym\n",
- "import numpy as np\n",
- "import numpy.random as rd\n",
- "import torch\n",
- "import torch.nn as nn\n",
- "from torch import Tensor\n",
- "from torch.distributions.normal import Normal\n",
- "\n",
- "\n",
- "class ActorPPO(nn.Module):\n",
- " def __init__(self, dims: [int], state_dim: int, action_dim: int):\n",
- " super().__init__()\n",
- " self.net = build_mlp(dims=[state_dim, *dims, action_dim])\n",
- " self.action_std_log = nn.Parameter(torch.zeros((1, action_dim)), requires_grad=True) # trainable parameter\n",
- "\n",
- " def forward(self, state: Tensor) -> Tensor:\n",
- " return self.net(state).tanh() # action.tanh()\n",
- "\n",
- " def get_action(self, state: Tensor) -> (Tensor, Tensor): # for exploration\n",
- " action_avg = self.net(state)\n",
- " action_std = self.action_std_log.exp()\n",
- "\n",
- " dist = Normal(action_avg, action_std)\n",
- " action = dist.sample()\n",
- " logprob = dist.log_prob(action).sum(1)\n",
- " return action, logprob\n",
- "\n",
- " def get_logprob_entropy(self, state: Tensor, action: Tensor) -> (Tensor, Tensor):\n",
- " action_avg = self.net(state)\n",
- " action_std = self.action_std_log.exp()\n",
- "\n",
- " dist = Normal(action_avg, action_std)\n",
- " logprob = dist.log_prob(action).sum(1)\n",
- " entropy = dist.entropy().sum(1)\n",
- " return logprob, entropy\n",
- "\n",
- " @staticmethod\n",
- " def convert_action_for_env(action: Tensor) -> Tensor:\n",
- " return action.tanh()\n",
- "\n",
- "\n",
- "class CriticPPO(nn.Module):\n",
- " def __init__(self, dims: [int], state_dim: int, _action_dim: int):\n",
- " super().__init__()\n",
- " self.net = build_mlp(dims=[state_dim, *dims, 1])\n",
- "\n",
- " def forward(self, state: Tensor) -> Tensor:\n",
- " return self.net(state) # advantage value\n",
- "\n",
- "\n",
- "def build_mlp(dims: [int]) -> nn.Sequential: # MLP (MultiLayer Perceptron)\n",
- " net_list = []\n",
- " for i in range(len(dims) - 1):\n",
- " net_list.extend([nn.Linear(dims[i], dims[i + 1]), nn.ReLU()])\n",
- " del net_list[-1] # remove the activation of output layer\n",
- " return nn.Sequential(*net_list)\n",
- "\n",
- "\n",
- "class Config:\n",
- " def __init__(self, agent_class=None, env_class=None, env_args=None):\n",
- " self.env_class = env_class # env = env_class(**env_args)\n",
- " self.env_args = env_args # env = env_class(**env_args)\n",
- "\n",
- " if env_args is None: # dummy env_args\n",
- " env_args = {'env_name': None, 'state_dim': None, 'action_dim': None, 'if_discrete': None}\n",
- " self.env_name = env_args['env_name'] # the name of environment. Be used to set 'cwd'.\n",
- " self.state_dim = env_args['state_dim'] # vector dimension (feature number) of state\n",
- " self.action_dim = env_args['action_dim'] # vector dimension (feature number) of action\n",
- " self.if_discrete = env_args['if_discrete'] # discrete or continuous action space\n",
- "\n",
- " self.agent_class = agent_class # agent = agent_class(...)\n",
- "\n",
- " '''Arguments for reward shaping'''\n",
- " self.gamma = 0.99 # discount factor of future rewards\n",
- " self.reward_scale = 1.0 # an approximate target reward usually be closed to 256\n",
- "\n",
- " '''Arguments for training'''\n",
- " self.gpu_id = int(0) # `int` means the ID of single GPU, -1 means CPU\n",
- " self.net_dims = (64, 32) # the middle layer dimension of MLP (MultiLayer Perceptron)\n",
- " self.learning_rate = 6e-5 # 2 ** -14 ~= 6e-5\n",
- " self.soft_update_tau = 5e-3 # 2 ** -8 ~= 5e-3\n",
- " self.batch_size = int(128) # num of transitions sampled from replay buffer.\n",
- " self.horizon_len = int(2000) # collect horizon_len step while exploring, then update network\n",
- " self.buffer_size = None # ReplayBuffer size. Empty the ReplayBuffer for on-policy.\n",
- " self.repeat_times = 8.0 # repeatedly update network using ReplayBuffer to keep critic's loss small\n",
- "\n",
- " '''Arguments for evaluate'''\n",
- " self.cwd = None # current working directory to save model. None means set automatically\n",
- " self.break_step = +np.inf # break training if 'total_step > break_step'\n",
- " self.eval_times = int(32) # number of times that get episodic cumulative return\n",
- " self.eval_per_step = int(2e4) # evaluate the agent per training steps\n",
- "\n",
- " def init_before_training(self):\n",
- " if self.cwd is None: # set cwd (current working directory) for saving model\n",
- " self.cwd = f'./{self.env_name}_{self.agent_class.__name__[5:]}'\n",
- " os.makedirs(self.cwd, exist_ok=True)\n",
- "\n",
- "\n",
- "def get_gym_env_args(env, if_print: bool) -> dict:\n",
- " if {'unwrapped', 'observation_space', 'action_space', 'spec'}.issubset(dir(env)): # isinstance(env, gym.Env):\n",
- " env_name = env.unwrapped.spec.id\n",
- " state_shape = env.observation_space.shape\n",
- " state_dim = state_shape[0] if len(state_shape) == 1 else state_shape # sometimes state_dim is a list\n",
- "\n",
- " if_discrete = isinstance(env.action_space, gym.spaces.Discrete)\n",
- " if if_discrete: # make sure it is discrete action space\n",
- " action_dim = env.action_space.n\n",
- " elif isinstance(env.action_space, gym.spaces.Box): # make sure it is continuous action space\n",
- " action_dim = env.action_space.shape[0]\n",
- "\n",
- " env_args = {'env_name': env_name, 'state_dim': state_dim, 'action_dim': action_dim, 'if_discrete': if_discrete}\n",
- " print(f\"env_args = {repr(env_args)}\") if if_print else None\n",
- " return env_args\n",
- "\n",
- "\n",
- "def kwargs_filter(function, kwargs: dict) -> dict:\n",
- " import inspect\n",
- " sign = inspect.signature(function).parameters.values()\n",
- " sign = {val.name for val in sign}\n",
- " common_args = sign.intersection(kwargs.keys())\n",
- " return {key: kwargs[key] for key in common_args} # filtered kwargs\n",
- "\n",
- "\n",
- "def build_env(env_class=None, env_args=None):\n",
- " if env_class.__module__ == 'gym.envs.registration': # special rule\n",
- " env = env_class(id=env_args['env_name'])\n",
- " else:\n",
- " env = env_class(**kwargs_filter(env_class.__init__, env_args.copy()))\n",
- " for attr_str in ('env_name', 'state_dim', 'action_dim', 'if_discrete'):\n",
- " setattr(env, attr_str, env_args[attr_str])\n",
- " return env\n",
- "\n",
- "\n",
- "class AgentBase:\n",
- " def __init__(self, net_dims: [int], state_dim: int, action_dim: int, gpu_id: int = 0, args: Config = Config()):\n",
- " self.state_dim = state_dim\n",
- " self.action_dim = action_dim\n",
- "\n",
- " self.gamma = args.gamma\n",
- " self.batch_size = args.batch_size\n",
- " self.repeat_times = args.repeat_times\n",
- " self.reward_scale = args.reward_scale\n",
- " self.soft_update_tau = args.soft_update_tau\n",
- "\n",
- " self.states = None # assert self.states == (1, state_dim)\n",
- " self.device = torch.device(f\"cuda:{gpu_id}\" if (torch.cuda.is_available() and (gpu_id >= 0)) else \"cpu\")\n",
- "\n",
- " act_class = getattr(self, \"act_class\", None)\n",
- " cri_class = getattr(self, \"cri_class\", None)\n",
- " self.act = self.act_target = act_class(net_dims, state_dim, action_dim).to(self.device)\n",
- " self.cri = self.cri_target = cri_class(net_dims, state_dim, action_dim).to(self.device) \\\n",
- " if cri_class else self.act\n",
- "\n",
- " self.act_optimizer = torch.optim.Adam(self.act.parameters(), args.learning_rate)\n",
- " self.cri_optimizer = torch.optim.Adam(self.cri.parameters(), args.learning_rate) \\\n",
- " if cri_class else self.act_optimizer\n",
- "\n",
- " self.criterion = torch.nn.SmoothL1Loss()\n",
- "\n",
- " @staticmethod\n",
- " def optimizer_update(optimizer, objective: Tensor):\n",
- " optimizer.zero_grad()\n",
- " objective.backward()\n",
- " optimizer.step()\n",
- "\n",
- " @staticmethod\n",
- " def soft_update(target_net: torch.nn.Module, current_net: torch.nn.Module, tau: float):\n",
- " for tar, cur in zip(target_net.parameters(), current_net.parameters()):\n",
- " tar.data.copy_(cur.data * tau + tar.data * (1.0 - tau))\n",
- "\n",
- "\n",
- "class AgentPPO(AgentBase):\n",
- " def __init__(self, net_dims: [int], state_dim: int, action_dim: int, gpu_id: int = 0, args: Config = Config()):\n",
- " self.if_off_policy = False\n",
- " self.act_class = getattr(self, \"act_class\", ActorPPO)\n",
- " self.cri_class = getattr(self, \"cri_class\", CriticPPO)\n",
- " AgentBase.__init__(self, net_dims, state_dim, action_dim, gpu_id, args)\n",
- "\n",
- " self.ratio_clip = getattr(args, \"ratio_clip\", 0.25) # `ratio.clamp(1 - clip, 1 + clip)`\n",
- " self.lambda_gae_adv = getattr(args, \"lambda_gae_adv\", 0.95) # could be 0.80~0.99\n",
- " self.lambda_entropy = getattr(args, \"lambda_entropy\", 0.01) # could be 0.00~0.10\n",
- " self.lambda_entropy = torch.tensor(self.lambda_entropy, dtype=torch.float32, device=self.device)\n",
- "\n",
- " def explore_env(self, env, horizon_len: int) -> [Tensor]:\n",
- " states = torch.zeros((horizon_len, self.state_dim), dtype=torch.float32).to(self.device)\n",
- " actions = torch.zeros((horizon_len, self.action_dim), dtype=torch.float32).to(self.device)\n",
- " logprobs = torch.zeros(horizon_len, dtype=torch.float32).to(self.device)\n",
- " rewards = torch.zeros(horizon_len, dtype=torch.float32).to(self.device)\n",
- " dones = torch.zeros(horizon_len, dtype=torch.bool).to(self.device)\n",
- "\n",
- " ary_state = self.states[0]\n",
- "\n",
- " get_action = self.act.get_action\n",
- " convert = self.act.convert_action_for_env\n",
- " for i in range(horizon_len):\n",
- " state = torch.as_tensor(ary_state, dtype=torch.float32, device=self.device)\n",
- " action, logprob = [t.squeeze(0) for t in get_action(state.unsqueeze(0))[:2]]\n",
- "\n",
- " ary_action = convert(action).detach().cpu().numpy()\n",
- " ary_state, reward, done, _, _ = env.step(ary_action)\n",
- " if done:\n",
- " ary_state, _ = env.reset()\n",
- "\n",
- " states[i] = state\n",
- " actions[i] = action\n",
- " logprobs[i] = logprob\n",
- " rewards[i] = reward\n",
- " dones[i] = done\n",
- "\n",
- " self.states[0] = ary_state\n",
- " rewards = (rewards * self.reward_scale).unsqueeze(1)\n",
- " undones = (1 - dones.type(torch.float32)).unsqueeze(1)\n",
- " return states, actions, logprobs, rewards, undones\n",
- "\n",
- " def update_net(self, buffer) -> [float]:\n",
- " with torch.no_grad():\n",
- " states, actions, logprobs, rewards, undones = buffer\n",
- " buffer_size = states.shape[0]\n",
- "\n",
- " '''get advantages reward_sums'''\n",
- " bs = 2 ** 10 # set a smaller 'batch_size' when out of GPU memory.\n",
- " values = [self.cri(states[i:i + bs]) for i in range(0, buffer_size, bs)]\n",
- " values = torch.cat(values, dim=0).squeeze(1) # values.shape == (buffer_size, )\n",
- "\n",
- " advantages = self.get_advantages(rewards, undones, values) # advantages.shape == (buffer_size, )\n",
- " reward_sums = advantages + values # reward_sums.shape == (buffer_size, )\n",
- " del rewards, undones, values\n",
- "\n",
- " advantages = (advantages - advantages.mean()) / (advantages.std(dim=0) + 1e-5)\n",
- " assert logprobs.shape == advantages.shape == reward_sums.shape == (buffer_size,)\n",
- "\n",
- " '''update network'''\n",
- " obj_critics = 0.0\n",
- " obj_actors = 0.0\n",
- "\n",
- " update_times = int(buffer_size * self.repeat_times / self.batch_size)\n",
- " assert update_times >= 1\n",
- " for _ in range(update_times):\n",
- " indices = torch.randint(buffer_size, size=(self.batch_size,), requires_grad=False)\n",
- " state = states[indices]\n",
- " action = actions[indices]\n",
- " logprob = logprobs[indices]\n",
- " advantage = advantages[indices]\n",
- " reward_sum = reward_sums[indices]\n",
- "\n",
- " value = self.cri(state).squeeze(1) # critic network predicts the reward_sum (Q value) of state\n",
- " obj_critic = self.criterion(value, reward_sum)\n",
- " self.optimizer_update(self.cri_optimizer, obj_critic)\n",
- "\n",
- " new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action)\n",
- " ratio = (new_logprob - logprob.detach()).exp()\n",
- " surrogate1 = advantage * ratio\n",
- " surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip)\n",
- " obj_surrogate = torch.min(surrogate1, surrogate2).mean()\n",
- "\n",
- " obj_actor = obj_surrogate + obj_entropy.mean() * self.lambda_entropy\n",
- " self.optimizer_update(self.act_optimizer, -obj_actor)\n",
- "\n",
- " obj_critics += obj_critic.item()\n",
- " obj_actors += obj_actor.item()\n",
- " a_std_log = getattr(self.act, 'a_std_log', torch.zeros(1)).mean()\n",
- " return obj_critics / update_times, obj_actors / update_times, a_std_log.item()\n",
- "\n",
- " def get_advantages(self, rewards: Tensor, undones: Tensor, values: Tensor) -> Tensor:\n",
- " advantages = torch.empty_like(values) # advantage value\n",
- "\n",
- " masks = undones * self.gamma\n",
- " horizon_len = rewards.shape[0]\n",
- "\n",
- " next_state = torch.tensor(self.states, dtype=torch.float32).to(self.device)\n",
- " next_value = self.cri(next_state).detach()[0, 0]\n",
- "\n",
- " advantage = 0 # last_gae_lambda\n",
- " for t in range(horizon_len - 1, -1, -1):\n",
- " delta = rewards[t] + masks[t] * next_value - values[t]\n",
- " advantages[t] = advantage = delta + masks[t] * self.lambda_gae_adv * advantage\n",
- " next_value = values[t]\n",
- " return advantages\n",
- "\n",
- "\n",
- "class PendulumEnv(gym.Wrapper): # a demo of custom gym env\n",
- " def __init__(self):\n",
- " gym.logger.set_level(40) # Block warning\n",
- " gym_env_name = \"Pendulum-v0\" if gym.__version__ < '0.18.0' else \"Pendulum-v1\"\n",
- " super().__init__(env=gym.make(gym_env_name))\n",
- "\n",
- " '''the necessary env information when you design a custom env'''\n",
- " self.env_name = gym_env_name # the name of this env.\n",
- " self.state_dim = self.observation_space.shape[0] # feature number of state\n",
- " self.action_dim = self.action_space.shape[0] # feature number of action\n",
- " self.if_discrete = False # discrete action or continuous action\n",
- "\n",
- " def reset(self) -> np.ndarray: # reset the agent in env\n",
- " resetted_env, _ = self.env.reset()\n",
- " return resetted_env\n",
- "\n",
- " def step(self, action: np.ndarray) -> (np.ndarray, float, bool, dict): # agent interacts in env\n",
- " # We suggest that adjust action space to (-1, +1) when designing a custom env.\n",
- " state, reward, done, info_dict, _ = self.env.step(action * 2)\n",
- " return state.reshape(self.state_dim), float(reward), done, info_dict\n",
- "\n",
- " \n",
- "def train_agent(args: Config):\n",
- " args.init_before_training()\n",
- "\n",
- " env = build_env(args.env_class, args.env_args)\n",
- " agent = args.agent_class(args.net_dims, args.state_dim, args.action_dim, gpu_id=args.gpu_id, args=args)\n",
- "\n",
- " new_env, _ = env.reset()\n",
- " agent.states = new_env[np.newaxis, :]\n",
- "\n",
- " evaluator = Evaluator(eval_env=build_env(args.env_class, args.env_args),\n",
- " eval_per_step=args.eval_per_step,\n",
- " eval_times=args.eval_times,\n",
- " cwd=args.cwd)\n",
- " torch.set_grad_enabled(False)\n",
- " while True: # start training\n",
- " buffer_items = agent.explore_env(env, args.horizon_len)\n",
- "\n",
- " torch.set_grad_enabled(True)\n",
- " logging_tuple = agent.update_net(buffer_items)\n",
- " torch.set_grad_enabled(False)\n",
- "\n",
- " evaluator.evaluate_and_save(agent.act, args.horizon_len, logging_tuple)\n",
- " if (evaluator.total_step > args.break_step) or os.path.exists(f\"{args.cwd}/stop\"):\n",
- " torch.save(agent.act.state_dict(), args.cwd + '/actor.pth')\n",
- " break # stop training when reach `break_step` or `mkdir cwd/stop`\n",
- "\n",
- "\n",
- "def render_agent(env_class, env_args: dict, net_dims: [int], agent_class, actor_path: str, render_times: int = 8):\n",
- " env = build_env(env_class, env_args)\n",
- "\n",
- " state_dim = env_args['state_dim']\n",
- " action_dim = env_args['action_dim']\n",
- " agent = agent_class(net_dims, state_dim, action_dim, gpu_id=-1)\n",
- " actor = agent.act\n",
- "\n",
- " print(f\"| render and load actor from: {actor_path}\")\n",
- " actor.load_state_dict(torch.load(actor_path, map_location=lambda storage, loc: storage))\n",
- " for i in range(render_times):\n",
- " cumulative_reward, episode_step = get_rewards_and_steps(env, actor, if_render=True)\n",
- " print(f\"|{i:4} cumulative_reward {cumulative_reward:9.3f} episode_step {episode_step:5.0f}\")\n",
- "\n",
- " \n",
- "class Evaluator:\n",
- " def __init__(self, eval_env, eval_per_step: int = 1e4, eval_times: int = 8, cwd: str = '.'):\n",
- " self.cwd = cwd\n",
- " self.env_eval = eval_env\n",
- " self.eval_step = 0\n",
- " self.total_step = 0\n",
- " self.start_time = time.time()\n",
- " self.eval_times = eval_times # number of times that get episodic cumulative return\n",
- " self.eval_per_step = eval_per_step # evaluate the agent per training steps\n",
- "\n",
- " self.recorder = []\n",
- " print(f\"\\n| `step`: Number of samples, or total training steps, or running times of `env.step()`.\"\n",
- " f\"\\n| `time`: Time spent from the start of training to this moment.\"\n",
- " f\"\\n| `avgR`: Average value of cumulative rewards, which is the sum of rewards in an episode.\"\n",
- " f\"\\n| `stdR`: Standard dev of cumulative rewards, which is the sum of rewards in an episode.\"\n",
- " f\"\\n| `avgS`: Average of steps in an episode.\"\n",
- " f\"\\n| `objC`: Objective of Critic network. Or call it loss function of critic network.\"\n",
- " f\"\\n| `objA`: Objective of Actor network. It is the average Q value of the critic network.\"\n",
- " f\"\\n| {'step':>8} {'time':>8} | {'avgR':>8} {'stdR':>6} {'avgS':>6} | {'objC':>8} {'objA':>8}\")\n",
- " \n",
- " def evaluate_and_save(self, actor, horizon_len: int, logging_tuple: tuple):\n",
- " self.total_step += horizon_len\n",
- " if self.eval_step + self.eval_per_step > self.total_step:\n",
- " return\n",
- " self.eval_step = self.total_step\n",
- "\n",
- " rewards_steps_ary = [get_rewards_and_steps(self.env_eval, actor) for _ in range(self.eval_times)]\n",
- " rewards_steps_ary = np.array(rewards_steps_ary, dtype=np.float32)\n",
- " avg_r = rewards_steps_ary[:, 0].mean() # average of cumulative rewards\n",
- " std_r = rewards_steps_ary[:, 0].std() # std of cumulative rewards\n",
- " avg_s = rewards_steps_ary[:, 1].mean() # average of steps in an episode\n",
- "\n",
- " used_time = time.time() - self.start_time\n",
- " self.recorder.append((self.total_step, used_time, avg_r))\n",
- " \n",
- " print(f\"| {self.total_step:8.2e} {used_time:8.0f} \"\n",
- " f\"| {avg_r:8.2f} {std_r:6.2f} {avg_s:6.0f} \"\n",
- " f\"| {logging_tuple[0]:8.2f} {logging_tuple[1]:8.2f}\")\n",
- "\n",
- "\n",
- "def get_rewards_and_steps(env, actor, if_render: bool = False) -> (float, int): # cumulative_rewards and episode_steps\n",
- " device = next(actor.parameters()).device # net.parameters() is a Python generator.\n",
- "\n",
- " state, _ = env.reset()\n",
- " episode_steps = 0\n",
- " cumulative_returns = 0.0 # sum of rewards in an episode\n",
- " for episode_steps in range(12345):\n",
- " tensor_state = torch.as_tensor(state, dtype=torch.float32, device=device).unsqueeze(0)\n",
- " tensor_action = actor(tensor_state)\n",
- " action = tensor_action.detach().cpu().numpy()[0] # not need detach(), because using torch.no_grad() outside\n",
- " state, reward, done, _, _ = env.step(action)\n",
- " cumulative_returns += reward\n",
- "\n",
- " if if_render:\n",
- " env.render()\n",
- " if done:\n",
- " break\n",
- " return cumulative_returns, episode_steps + 1"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "9tzAw9k26nAC"
- },
- "source": [
- "## DRL Agent Class"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "pwCbbocm6PHM"
- },
- "outputs": [],
- "source": [
- "from __future__ import annotations\n",
- "\n",
- "import torch\n",
- "# from elegantrl.agents import AgentA2C\n",
- "\n",
- "MODELS = {\"ppo\": AgentPPO}\n",
- "OFF_POLICY_MODELS = [\"ddpg\", \"td3\", \"sac\"]\n",
- "ON_POLICY_MODELS = [\"ppo\"]\n",
- "# MODEL_KWARGS = {x: config.__dict__[f\"{x.upper()}_PARAMS\"] for x in MODELS.keys()}\n",
- "#\n",
- "# NOISE = {\n",
- "# \"normal\": NormalActionNoise,\n",
- "# \"ornstein_uhlenbeck\": OrnsteinUhlenbeckActionNoise,\n",
- "# }\n",
- "\n",
- "\n",
- "class DRLAgent:\n",
- " \"\"\"Implementations of DRL algorithms\n",
- " Attributes\n",
- " ----------\n",
- " env: gym environment class\n",
- " user-defined class\n",
- " Methods\n",
- " -------\n",
- " get_model()\n",
- " setup DRL algorithms\n",
- " train_model()\n",
- " train DRL algorithms in a train dataset\n",
- " and output the trained model\n",
- " DRL_prediction()\n",
- " make a prediction in a test dataset and get results\n",
- " \"\"\"\n",
- "\n",
- " def __init__(self, env, price_array, tech_array, turbulence_array):\n",
- " self.env = env\n",
- " self.price_array = price_array\n",
- " self.tech_array = tech_array\n",
- " self.turbulence_array = turbulence_array\n",
- "\n",
- " def get_model(self, model_name, model_kwargs):\n",
- " env_config = {\n",
- " \"price_array\": self.price_array,\n",
- " \"tech_array\": self.tech_array,\n",
- " \"turbulence_array\": self.turbulence_array,\n",
- " \"if_train\": True,\n",
- " }\n",
- " environment = self.env(config=env_config)\n",
- " env_args = {'config': env_config,\n",
- " 'env_name': environment.env_name,\n",
- " 'state_dim': environment.state_dim,\n",
- " 'action_dim': environment.action_dim,\n",
- " 'if_discrete': False}\n",
- " agent = MODELS[model_name]\n",
- " if model_name not in MODELS:\n",
- " raise NotImplementedError(\"NotImplementedError\")\n",
- " model = Config(agent_class=agent, env_class=self.env, env_args=env_args)\n",
- " model.if_off_policy = model_name in OFF_POLICY_MODELS\n",
- " if model_kwargs is not None:\n",
- " try:\n",
- " model.learning_rate = model_kwargs[\"learning_rate\"]\n",
- " model.batch_size = model_kwargs[\"batch_size\"]\n",
- " model.gamma = model_kwargs[\"gamma\"]\n",
- " model.seed = model_kwargs[\"seed\"]\n",
- " model.net_dims = model_kwargs[\"net_dimension\"]\n",
- " model.target_step = model_kwargs[\"target_step\"]\n",
- " model.eval_gap = model_kwargs[\"eval_gap\"]\n",
- " model.eval_times = model_kwargs[\"eval_times\"]\n",
- " except BaseException:\n",
- " raise ValueError(\n",
- " \"Fail to read arguments, please check 'model_kwargs' input.\"\n",
- " )\n",
- " return model\n",
- "\n",
- " def train_model(self, model, cwd, total_timesteps=5000):\n",
- " model.cwd = cwd\n",
- " model.break_step = total_timesteps\n",
- " train_agent(model)\n",
- "\n",
- " @staticmethod\n",
- " def DRL_prediction(model_name, cwd, net_dimension, environment):\n",
- " if model_name not in MODELS:\n",
- " raise NotImplementedError(\"NotImplementedError\")\n",
- " agent_class = MODELS[model_name]\n",
- " environment.env_num = 1\n",
- " agent = agent_class(net_dimension, environment.state_dim, environment.action_dim)\n",
- " actor = agent.act\n",
- " # load agent\n",
- " try: \n",
- " cwd = cwd + '/actor.pth'\n",
- " print(f\"| load actor from: {cwd}\")\n",
- " actor.load_state_dict(torch.load(cwd, map_location=lambda storage, loc: storage))\n",
- " act = actor\n",
- " device = agent.device\n",
- " except BaseException:\n",
- " raise ValueError(\"Fail to load agent!\")\n",
- "\n",
- " # test on the testing env\n",
- " _torch = torch\n",
- " state, _ = environment.reset()\n",
- " episode_returns = [] # the cumulative_return / initial_account\n",
- " episode_total_assets = [environment.initial_total_asset]\n",
- " with _torch.no_grad():\n",
- " for i in range(environment.max_step):\n",
- " s_tensor = _torch.as_tensor((state,), device=device)\n",
- " a_tensor = act(s_tensor) # action_tanh = act.forward()\n",
- " action = (\n",
- " a_tensor.detach().cpu().numpy()[0]\n",
- " ) # not need detach(), because with torch.no_grad() outside\n",
- " state, reward, done, _, _ = environment.step(action)\n",
- "\n",
- " total_asset = (\n",
- " environment.amount\n",
- " + (\n",
- " environment.price_ary[environment.day] * environment.stocks\n",
- " ).sum()\n",
- " )\n",
- " episode_total_assets.append(total_asset)\n",
- " episode_return = total_asset / environment.initial_total_asset\n",
- " episode_returns.append(episode_return)\n",
- " if done:\n",
- " break\n",
- " print(\"Test Finished!\")\n",
- " # return episode total_assets on testing data\n",
- " print(\"episode_return\", episode_return)\n",
- " return episode_total_assets\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "zjLda8No6pvI"
- },
- "source": [
- "## Train & Test Functions"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "j8-e03ev32oz"
- },
- "outputs": [],
- "source": [
- "from __future__ import annotations\n",
- "\n",
- "from finrl.meta.data_processor import DataProcessor\n",
- "\n",
- "def train(\n",
- " start_date,\n",
- " end_date,\n",
- " ticker_list,\n",
- " data_source,\n",
- " time_interval,\n",
- " technical_indicator_list,\n",
- " drl_lib,\n",
- " env,\n",
- " model_name,\n",
- " if_vix=True,\n",
- " **kwargs,\n",
- "):\n",
- " # download data\n",
- " dp = DataProcessor(data_source, **kwargs)\n",
- " data = dp.download_data(ticker_list, start_date, end_date, time_interval)\n",
- " data = dp.clean_data(data)\n",
- " data = dp.add_technical_indicator(data, technical_indicator_list)\n",
- " if if_vix:\n",
- " data = dp.add_vix(data)\n",
- " else:\n",
- " data = dp.add_turbulence(data)\n",
- " price_array, tech_array, turbulence_array = dp.df_to_array(data, if_vix)\n",
- " env_config = {\n",
- " \"price_array\": price_array,\n",
- " \"tech_array\": tech_array,\n",
- " \"turbulence_array\": turbulence_array,\n",
- " \"if_train\": True,\n",
- " }\n",
- " env_instance = env(config=env_config)\n",
- "\n",
- " # read parameters\n",
- " cwd = kwargs.get(\"cwd\", \"./\" + str(model_name))\n",
- "\n",
- " if drl_lib == \"elegantrl\":\n",
- " DRLAgent_erl = DRLAgent\n",
- " break_step = kwargs.get(\"break_step\", 1e6)\n",
- " erl_params = kwargs.get(\"erl_params\")\n",
- " agent = DRLAgent_erl(\n",
- " env=env,\n",
- " price_array=price_array,\n",
- " tech_array=tech_array,\n",
- " turbulence_array=turbulence_array,\n",
- " )\n",
- " model = agent.get_model(model_name, model_kwargs=erl_params)\n",
- " trained_model = agent.train_model(\n",
- " model=model, cwd=cwd, total_timesteps=break_step\n",
- " )"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "Evsg8QtEDHDO"
- },
- "outputs": [],
- "source": [
- "from __future__ import annotations\n",
- "\n",
- "from finrl.config import INDICATORS\n",
- "from finrl.config import RLlib_PARAMS\n",
- "from finrl.config import TEST_END_DATE\n",
- "from finrl.config import TEST_START_DATE\n",
- "from finrl.config_tickers import DOW_30_TICKER\n",
- "\n",
- "def test(\n",
- " start_date,\n",
- " end_date,\n",
- " ticker_list,\n",
- " data_source,\n",
- " time_interval,\n",
- " technical_indicator_list,\n",
- " drl_lib,\n",
- " env,\n",
- " model_name,\n",
- " if_vix=True,\n",
- " **kwargs,\n",
- "):\n",
- "\n",
- " # import data processor\n",
- " from finrl.meta.data_processor import DataProcessor\n",
- "\n",
- " # fetch data\n",
- " dp = DataProcessor(data_source, **kwargs)\n",
- " data = dp.download_data(ticker_list, start_date, end_date, time_interval)\n",
- " data = dp.clean_data(data)\n",
- " data = dp.add_technical_indicator(data, technical_indicator_list)\n",
- "\n",
- " if if_vix:\n",
- " data = dp.add_vix(data)\n",
- " else:\n",
- " data = dp.add_turbulence(data)\n",
- " price_array, tech_array, turbulence_array = dp.df_to_array(data, if_vix)\n",
- "\n",
- " env_config = {\n",
- " \"price_array\": price_array,\n",
- " \"tech_array\": tech_array,\n",
- " \"turbulence_array\": turbulence_array,\n",
- " \"if_train\": False,\n",
- " }\n",
- " env_instance = env(config=env_config)\n",
- "\n",
- " # load elegantrl needs state dim, action dim and net dim\n",
- " net_dimension = kwargs.get(\"net_dimension\", 2**7)\n",
- " cwd = kwargs.get(\"cwd\", \"./\" + str(model_name))\n",
- " print(\"price_array: \", len(price_array))\n",
- "\n",
- " if drl_lib == \"elegantrl\":\n",
- " DRLAgent_erl = DRLAgent\n",
- " episode_total_assets = DRLAgent_erl.DRL_prediction(\n",
- " model_name=model_name,\n",
- " cwd=cwd,\n",
- " net_dimension=net_dimension,\n",
- " environment=env_instance,\n",
- " )\n",
- " return episode_total_assets"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "pf5aVHAU-xF6"
- },
- "source": [
- "## Import Dow Jones 30 Symbols"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "jx25TA_X87F-"
- },
- "outputs": [],
- "source": [
- "ticker_list = DOW_30_TICKER\n",
- "action_dim = len(DOW_30_TICKER)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "UIV0kO_y-inG",
- "outputId": "bd7b3c21-641e-4eb7-a4af-ae7d156042a6"
- },
- "outputs": [],
- "source": [
- "print(ticker_list)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "CnqQ-cC5-rfO",
- "outputId": "29b248c9-ec98-44cd-befb-65192af72ea4"
- },
- "outputs": [],
- "source": [
- "print(INDICATORS)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "rZMkcyjZ-25l"
- },
- "source": [
- "## Calculate the DRL state dimension manually for paper trading"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "GLfkTsXK-e90"
- },
- "outputs": [],
- "source": [
- "# amount + (turbulence, turbulence_bool) + (price, shares, cd (holding time)) * stock_dim + tech_dim\n",
- "state_dim = 1 + 2 + 3 * action_dim + len(INDICATORS) * action_dim"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "QqUkvImG-n66",
- "outputId": "9cb4a3d8-5064-4971-d095-65d3ab12f11a"
- },
- "outputs": [],
- "source": [
- "state_dim"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "8Z6qlLXY-fA2"
- },
- "outputs": [],
- "source": [
- "env = StockTradingEnv"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "J25MuZLiGqCP"
- },
- "source": [
- "## Show the data"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "puJZWm8NHtSN"
- },
- "source": [
- "### Step 1. Pick a data source"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "3ZCru8f7GqgL",
- "outputId": "010e6a83-1280-410a-e240-4bc8ec124774"
- },
- "outputs": [],
- "source": [
- "#DP = DataProcessor(data_source = 'alpaca',\n",
- "# API_KEY = API_KEY, \n",
- "# API_SECRET = API_SECRET, \n",
- "# API_BASE_URL = API_BASE_URL\n",
- "# )"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "nvPEW2mYHvkR"
- },
- "source": [
- "### Step 2. Get ticker list, Set start date and end date, specify the data frequency"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "NPNxj6c8HIiE"
- },
- "outputs": [],
- "source": [
- "#data = DP.download_data(start_date = '2021-10-04', \n",
- "# end_date = '2021-10-08',\n",
- "# ticker_list = ticker_list, \n",
- "# time_interval= '1Min')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "pPcazCq1d5ec",
- "outputId": "39d61284-7b51-46c2-cc2d-424f0f569e25"
- },
- "outputs": [],
- "source": [
- "#data['timestamp'].nunique()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "i46jGdE0IAel"
- },
- "source": [
- "### Step 3. Data Cleaning & Feature Engineering"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "x9euUsEPHWFK",
- "outputId": "2ae7fae7-d9ae-4f34-f32a-13e1476debea"
- },
- "outputs": [],
- "source": [
- "#data = DP.clean_data(data)\n",
- "#data = DP.add_technical_indicator(data, INDICATORS)\n",
- "#data = DP.add_vix(data)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "GOcPTaAgHdxa",
- "outputId": "4da334de-fbf6-49ca-ed22-bf1a99469457"
- },
- "outputs": [],
- "source": [
- "#data.shape"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "bbu03L_UIMWt"
- },
- "source": [
- "### Step 4. Transform to numpy array"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "Rzj0vjZZHdGM",
- "outputId": "d0ec43a2-b78e-4c09-c048-b88e7eba6c81"
- },
- "outputs": [],
- "source": [
- "#price_array, tech_array, turbulence_array = DP.df_to_array(data, if_vix=True)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "eW0UDAXI1nEa"
- },
- "source": [
- "# Part 2: Train the agent"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "lArLOFcJ7VMO"
- },
- "source": [
- "## Train"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "g1F84mebj4gu"
- },
- "outputs": [],
- "source": [
- "ERL_PARAMS = {\"learning_rate\": 3e-6,\"batch_size\": 2048,\"gamma\": 0.985,\n",
- " \"seed\":312,\"net_dimension\":[128,64], \"target_step\":5000, \"eval_gap\":30,\n",
- " \"eval_times\":1} \n",
- "env = StockTradingEnv\n",
- "#if you want to use larger datasets (change to longer period), and it raises error, \n",
- "#please try to increase \"target_step\". It should be larger than the episode steps. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "BxcNI2fdNjip",
- "outputId": "8db09736-a3a1-48a2-9e61-f9d8828ee327"
- },
- "outputs": [],
- "source": [
- "train(start_date = '2022-08-25', \n",
- " end_date = '2022-08-31',\n",
- " ticker_list = ticker_list, \n",
- " data_source = 'alpaca',\n",
- " time_interval= '1Min', \n",
- " technical_indicator_list= INDICATORS,\n",
- " drl_lib='elegantrl', \n",
- " env=env,\n",
- " model_name='ppo',\n",
- " if_vix=True, \n",
- " API_KEY = API_KEY, \n",
- " API_SECRET = API_SECRET, \n",
- " API_BASE_URL = API_BASE_URL,\n",
- " erl_params=ERL_PARAMS,\n",
- " cwd='./papertrading_erl', #current_working_dir\n",
- " break_step=1e5)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "g37WugV_1pAS"
- },
- "source": [
- "## Test"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "SxYoWCDa02TW"
- },
- "outputs": [],
- "source": [
- "account_value_erl=test(start_date = '2022-09-01', \n",
- " end_date = '2022-09-02',\n",
- " ticker_list = ticker_list, \n",
- " data_source = 'alpaca',\n",
- " time_interval= '1Min', \n",
- " technical_indicator_list= INDICATORS,\n",
- " drl_lib='elegantrl', \n",
- " env=env, \n",
- " model_name='ppo',\n",
- " if_vix=True, \n",
- " API_KEY = API_KEY, \n",
- " API_SECRET = API_SECRET, \n",
- " API_BASE_URL = API_BASE_URL,\n",
- " cwd='./papertrading_erl',\n",
- " net_dimension = ERL_PARAMS['net_dimension'])"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "e8aNQ58X7avM"
- },
- "source": [
- "## Use full data to train "
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "3CQ9_Yv41r88"
- },
- "source": [
- "After tuning well, retrain on the training and testing sets"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "cUSgbwt_10V3",
- "outputId": "50f3d8c6-b333-480e-b2fb-25e566797806"
- },
- "outputs": [],
- "source": [
- "train(start_date = '2022-08-25', \n",
- " end_date = '2022-09-02',\n",
- " ticker_list = ticker_list, \n",
- " data_source = 'alpaca',\n",
- " time_interval= '1Min', \n",
- " technical_indicator_list= INDICATORS,\n",
- " drl_lib='elegantrl', \n",
- " env=env, \n",
- " model_name='ppo',\n",
- " if_vix=True, \n",
- " API_KEY = API_KEY, \n",
- " API_SECRET = API_SECRET, \n",
- " API_BASE_URL = API_BASE_URL,\n",
- " erl_params=ERL_PARAMS,\n",
- " cwd='./papertrading_erl_retrain',\n",
- " break_step=2e5)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "sIQN6Ggt7gXY"
- },
- "source": [
- "# Part 3: Deploy the agent"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "UFoxkigg1zXa"
- },
- "source": [
- "## Setup Alpaca Paper trading environment"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "id": "LpkoZpYzfneS"
- },
- "outputs": [],
- "source": [
- "import datetime\n",
- "import threading\n",
- "from finrl.meta.data_processors.processor_alpaca import AlpacaProcessor\n",
- "import alpaca_trade_api as tradeapi\n",
- "import time\n",
- "import pandas as pd\n",
- "import numpy as np\n",
- "import torch\n",
- "import gym\n",
- "\n",
- "class AlpacaPaperTrading():\n",
- "\n",
- " def __init__(self,ticker_list, time_interval, drl_lib, agent, cwd, net_dim, \n",
- " state_dim, action_dim, API_KEY, API_SECRET, \n",
- " API_BASE_URL, tech_indicator_list, turbulence_thresh=30, \n",
- " max_stock=1e2, latency = None):\n",
- " #load agent\n",
- " self.drl_lib = drl_lib\n",
- " if agent =='ppo':\n",
- " if drl_lib == 'elegantrl': \n",
- " agent_class = AgentPPO\n",
- " agent = agent_class(net_dim, state_dim, action_dim)\n",
- " actor = agent.act\n",
- " # load agent\n",
- " try: \n",
- " cwd = cwd + '/actor.pth'\n",
- " print(f\"| load actor from: {cwd}\")\n",
- " actor.load_state_dict(torch.load(cwd, map_location=lambda storage, loc: storage))\n",
- " self.act = actor\n",
- " self.device = agent.device\n",
- " except BaseException:\n",
- " raise ValueError(\"Fail to load agent!\")\n",
- " \n",
- " elif drl_lib == 'rllib':\n",
- " from ray.rllib.agents import ppo\n",
- " from ray.rllib.agents.ppo.ppo import PPOTrainer\n",
- " \n",
- " config = ppo.DEFAULT_CONFIG.copy()\n",
- " config['env'] = StockEnvEmpty\n",
- " config[\"log_level\"] = \"WARN\"\n",
- " config['env_config'] = {'state_dim':state_dim,\n",
- " 'action_dim':action_dim,}\n",
- " trainer = PPOTrainer(env=StockEnvEmpty, config=config)\n",
- " trainer.restore(cwd)\n",
- " try:\n",
- " trainer.restore(cwd)\n",
- " self.agent = trainer\n",
- " print(\"Restoring from checkpoint path\", cwd)\n",
- " except:\n",
- " raise ValueError('Fail to load agent!')\n",
- " \n",
- " elif drl_lib == 'stable_baselines3':\n",
- " from stable_baselines3 import PPO\n",
- " \n",
- " try:\n",
- " #load agent\n",
- " self.model = PPO.load(cwd)\n",
- " print(\"Successfully load model\", cwd)\n",
- " except:\n",
- " raise ValueError('Fail to load agent!')\n",
- " \n",
- " else:\n",
- " raise ValueError('The DRL library input is NOT supported yet. Please check your input.')\n",
- " \n",
- " else:\n",
- " raise ValueError('Agent input is NOT supported yet.')\n",
- " \n",
- " \n",
- " \n",
- " #connect to Alpaca trading API\n",
- " try:\n",
- " self.alpaca = tradeapi.REST(API_KEY,API_SECRET,API_BASE_URL, 'v2')\n",
- " except:\n",
- " raise ValueError('Fail to connect Alpaca. Please check account info and internet connection.')\n",
- " \n",
- " #read trading time interval\n",
- " if time_interval == '1s':\n",
- " self.time_interval = 1\n",
- " elif time_interval == '5s':\n",
- " self.time_interval = 5\n",
- " elif time_interval == '1Min':\n",
- " self.time_interval = 60\n",
- " elif time_interval == '5Min':\n",
- " self.time_interval = 60 * 5\n",
- " elif time_interval == '15Min':\n",
- " self.time_interval = 60 * 15\n",
- " else:\n",
- " raise ValueError('Time interval input is NOT supported yet.')\n",
- " \n",
- " #read trading settings\n",
- " self.tech_indicator_list = tech_indicator_list\n",
- " self.turbulence_thresh = turbulence_thresh\n",
- " self.max_stock = max_stock \n",
- " \n",
- " #initialize account\n",
- " self.stocks = np.asarray([0] * len(ticker_list)) #stocks holding\n",
- " self.stocks_cd = np.zeros_like(self.stocks) \n",
- " self.cash = None #cash record \n",
- " self.stocks_df = pd.DataFrame(self.stocks, columns=['stocks'], index = ticker_list)\n",
- " self.asset_list = []\n",
- " self.price = np.asarray([0] * len(ticker_list))\n",
- " self.stockUniverse = ticker_list\n",
- " self.turbulence_bool = 0\n",
- " self.equities = []\n",
- " \n",
- " def test_latency(self, test_times = 10): \n",
- " total_time = 0\n",
- " for i in range(0, test_times):\n",
- " time0 = time.time()\n",
- " self.get_state()\n",
- " time1 = time.time()\n",
- " temp_time = time1 - time0\n",
- " total_time += temp_time\n",
- " latency = total_time/test_times\n",
- " print('latency for data processing: ', latency)\n",
- " return latency\n",
- " \n",
- " def run(self):\n",
- " orders = self.alpaca.list_orders(status=\"open\")\n",
- " for order in orders:\n",
- " self.alpaca.cancel_order(order.id)\n",
- " \n",
- " # Wait for market to open.\n",
- " print(\"Waiting for market to open...\")\n",
- " self.awaitMarketOpen()\n",
- " print(\"Market opened.\")\n",
- "\n",
- " while True:\n",
- "\n",
- " # Figure out when the market will close so we can prepare to sell beforehand.\n",
- " clock = self.alpaca.get_clock()\n",
- " closingTime = clock.next_close.replace(tzinfo=datetime.timezone.utc).timestamp()\n",
- " currTime = clock.timestamp.replace(tzinfo=datetime.timezone.utc).timestamp()\n",
- " self.timeToClose = closingTime - currTime\n",
- " \n",
- " if(self.timeToClose < (60)):\n",
- " # Close all positions when 1 minutes til market close.\n",
- " print(\"Market closing soon. Stop trading.\")\n",
- " break\n",
- " \n",
- " '''# Close all positions when 1 minutes til market close.\n",
- " print(\"Market closing soon. Closing positions.\")\n",
- "\n",
- " threads = []\n",
- " positions = self.alpaca.list_positions()\n",
- " for position in positions:\n",
- " if(position.side == 'long'):\n",
- " orderSide = 'sell'\n",
- " else:\n",
- " orderSide = 'buy'\n",
- " qty = abs(int(float(position.qty)))\n",
- " respSO = []\n",
- " tSubmitOrder = threading.Thread(target=self.submitOrder(qty, position.symbol, orderSide, respSO))\n",
- " tSubmitOrder.start()\n",
- " threads.append(tSubmitOrder) # record thread for joining later\n",
- "\n",
- " for x in threads: # wait for all threads to complete\n",
- " x.join() \n",
- " # Run script again after market close for next trading day.\n",
- " print(\"Sleeping until market close (15 minutes).\")\n",
- " time.sleep(60 * 15)'''\n",
- " \n",
- " else:\n",
- " self.trade()\n",
- " last_equity = float(self.alpaca.get_account().last_equity)\n",
- " cur_time = time.time()\n",
- " self.equities.append([cur_time,last_equity])\n",
- " time.sleep(self.time_interval)\n",
- " \n",
- " def awaitMarketOpen(self):\n",
- " isOpen = self.alpaca.get_clock().is_open\n",
- " while(not isOpen):\n",
- " clock = self.alpaca.get_clock()\n",
- " openingTime = clock.next_open.replace(tzinfo=datetime.timezone.utc).timestamp()\n",
- " currTime = clock.timestamp.replace(tzinfo=datetime.timezone.utc).timestamp()\n",
- " timeToOpen = int((openingTime - currTime) / 60)\n",
- " print(str(timeToOpen) + \" minutes til market open.\")\n",
- " time.sleep(60)\n",
- " isOpen = self.alpaca.get_clock().is_open\n",
- " \n",
- " def trade(self):\n",
- " state = self.get_state()\n",
- " \n",
- " if self.drl_lib == 'elegantrl':\n",
- " with torch.no_grad():\n",
- " s_tensor = torch.as_tensor((state,), device=self.device)\n",
- " a_tensor = self.act(s_tensor) \n",
- " action = a_tensor.detach().cpu().numpy()[0] \n",
- " action = (action * self.max_stock).astype(int)\n",
- " \n",
- " elif self.drl_lib == 'rllib':\n",
- " action = self.agent.compute_single_action(state)\n",
- " \n",
- " elif self.drl_lib == 'stable_baselines3':\n",
- " action = self.model.predict(state)[0]\n",
- " \n",
- " else:\n",
- " raise ValueError('The DRL library input is NOT supported yet. Please check your input.')\n",
- " \n",
- " self.stocks_cd += 1\n",
- " if self.turbulence_bool == 0:\n",
- " min_action = 10 # stock_cd\n",
- " threads = []\n",
- " for index in np.where(action < -min_action)[0]: # sell_index:\n",
- " sell_num_shares = min(self.stocks[index], -action[index])\n",
- " qty = abs(int(sell_num_shares))\n",
- " respSO = []\n",
- " tSubmitOrder = threading.Thread(target=self.submitOrder(qty, self.stockUniverse[index], 'sell', respSO))\n",
- " tSubmitOrder.start()\n",
- " threads.append(tSubmitOrder) # record thread for joining later\n",
- " self.cash = float(self.alpaca.get_account().cash)\n",
- " self.stocks_cd[index] = 0\n",
- " \n",
- " for x in threads: # wait for all threads to complete\n",
- " x.join() \n",
- "\n",
- " threads = []\n",
- " for index in np.where(action > min_action)[0]: # buy_index:\n",
- " if self.cash < 0:\n",
- " tmp_cash = 0\n",
- " else:\n",
- " tmp_cash = self.cash\n",
- " buy_num_shares = min(tmp_cash // self.price[index], abs(int(action[index])))\n",
- " if (buy_num_shares != buy_num_shares): # if buy_num_change = nan\n",
- " qty = 0 # set to 0 quantity\n",
- " else:\n",
- " qty = abs(int(buy_num_shares))\n",
- " qty = abs(int(buy_num_shares))\n",
- " respSO = []\n",
- " tSubmitOrder = threading.Thread(target=self.submitOrder(qty, self.stockUniverse[index], 'buy', respSO))\n",
- " tSubmitOrder.start()\n",
- " threads.append(tSubmitOrder) # record thread for joining later\n",
- " self.cash = float(self.alpaca.get_account().cash)\n",
- " self.stocks_cd[index] = 0\n",
- "\n",
- " for x in threads: # wait for all threads to complete\n",
- " x.join() \n",
- " \n",
- " else: # sell all when turbulence\n",
- " threads = []\n",
- " positions = self.alpaca.list_positions()\n",
- " for position in positions:\n",
- " if(position.side == 'long'):\n",
- " orderSide = 'sell'\n",
- " else:\n",
- " orderSide = 'buy'\n",
- " qty = abs(int(float(position.qty)))\n",
- " respSO = []\n",
- " tSubmitOrder = threading.Thread(target=self.submitOrder(qty, position.symbol, orderSide, respSO))\n",
- " tSubmitOrder.start()\n",
- " threads.append(tSubmitOrder) # record thread for joining later\n",
- "\n",
- " for x in threads: # wait for all threads to complete\n",
- " x.join() \n",
- " \n",
- " self.stocks_cd[:] = 0\n",
- " \n",
- " \n",
- " def get_state(self):\n",
- " alpaca = AlpacaProcessor(api=self.alpaca)\n",
- " price, tech, turbulence = alpaca.fetch_latest_data(ticker_list = self.stockUniverse, time_interval='1Min',\n",
- " tech_indicator_list=self.tech_indicator_list)\n",
- " turbulence_bool = 1 if turbulence >= self.turbulence_thresh else 0\n",
- " \n",
- " turbulence = (self.sigmoid_sign(turbulence, self.turbulence_thresh) * 2 ** -5).astype(np.float32)\n",
- " \n",
- " tech = tech * 2 ** -7\n",
- " positions = self.alpaca.list_positions()\n",
- " stocks = [0] * len(self.stockUniverse)\n",
- " for position in positions:\n",
- " ind = self.stockUniverse.index(position.symbol)\n",
- " stocks[ind] = ( abs(int(float(position.qty))))\n",
- " \n",
- " stocks = np.asarray(stocks, dtype = float)\n",
- " cash = float(self.alpaca.get_account().cash)\n",
- " self.cash = cash\n",
- " self.stocks = stocks\n",
- " self.turbulence_bool = turbulence_bool \n",
- " self.price = price\n",
- " \n",
- " \n",
- " \n",
- " amount = np.array(self.cash * (2 ** -12), dtype=np.float32)\n",
- " scale = np.array(2 ** -6, dtype=np.float32)\n",
- " state = np.hstack((amount,\n",
- " turbulence,\n",
- " self.turbulence_bool,\n",
- " price * scale,\n",
- " self.stocks * scale,\n",
- " self.stocks_cd,\n",
- " tech,\n",
- " )).astype(np.float32)\n",
- " state[np.isnan(state)] = 0.0\n",
- " state[np.isinf(state)] = 0.0\n",
- " print(len(self.stockUniverse))\n",
- " return state\n",
- " \n",
- " def submitOrder(self, qty, stock, side, resp):\n",
- " if(qty > 0):\n",
- " try:\n",
- " self.alpaca.submit_order(stock, qty, side, \"market\", \"day\")\n",
- " print(\"Market order of | \" + str(qty) + \" \" + stock + \" \" + side + \" | completed.\")\n",
- " resp.append(True)\n",
- " except:\n",
- " print(\"Order of | \" + str(qty) + \" \" + stock + \" \" + side + \" | did not go through.\")\n",
- " resp.append(False)\n",
- " else:\n",
- " print(\"Quantity is 0, order of | \" + str(qty) + \" \" + stock + \" \" + side + \" | not completed.\")\n",
- " resp.append(True)\n",
- "\n",
- " @staticmethod\n",
- " def sigmoid_sign(ary, thresh):\n",
- " def sigmoid(x):\n",
- " return 1 / (1 + np.exp(-x * np.e)) - 0.5\n",
- "\n",
- " return sigmoid(ary / thresh) * thresh\n",
- " \n",
- "class StockEnvEmpty(gym.Env):\n",
- " #Empty Env used for loading rllib agent\n",
- " def __init__(self,config):\n",
- " state_dim = config['state_dim']\n",
- " action_dim = config['action_dim']\n",
- " self.env_num = 1\n",
- " self.max_step = 10000\n",
- " self.env_name = 'StockEnvEmpty'\n",
- " self.state_dim = state_dim \n",
- " self.action_dim = action_dim\n",
- " self.if_discrete = False \n",
- " self.target_return = 9999\n",
- " self.observation_space = gym.spaces.Box(low=-3000, high=3000, shape=(state_dim,), dtype=np.float32)\n",
- " self.action_space = gym.spaces.Box(low=-1, high=1, shape=(action_dim,), dtype=np.float32)\n",
- " \n",
- " def reset(self):\n",
- " return \n",
- "\n",
- " def step(self, actions):\n",
- " return"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "os4C4-4H7ns7"
- },
- "source": [
- "## Run Paper trading"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "7nw0i-0UN3-7",
- "outputId": "25729df7-4775-49af-bf5a-38e3970d0056"
- },
- "outputs": [],
- "source": [
- "print(DOW_30_TICKER)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "YsSBK9ION1t6",
- "outputId": "49a69655-850f-436b-a21c-fffe48528e71"
- },
- "outputs": [],
- "source": [
- "state_dim"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "xYtSv6P1N247",
- "outputId": "174550ce-664a-41fc-bd89-9d3726960c5b"
- },
- "outputs": [],
- "source": [
- "action_dim"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "Kl9nulnAJtiI"
- },
- "outputs": [
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "V1ofncK2cYhs"
+ },
+ "source": [
+ "Disclaimer: Nothing herein is financial advice, and NOT a recommendation to trade real money. Many platforms exist for simulated trading (paper trading) which can be used for building and developing the methods discussed. Please use common sense and always first consult a professional before trading or investing."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "j3mbRu3s1YlD"
+ },
+ "source": [
+ "# Part 1: Install FinRL"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "0gkmsPgbvNf6"
+ },
+ "outputs": [],
+ "source": [
+ "## install finrl library\n",
+ "!pip install git+https://github.com/AI4Finance-Foundation/FinRL.git"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "3rwy7V72-8YY"
+ },
+ "source": [
+ "## Get the API Keys Ready"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "8Z6qlLXY-fA2"
+ },
+ "outputs": [],
+ "source": [
+ "API_KEY = 'API_KEY'\n",
+ "API_SECRET = 'API_SECRET'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "--6Kx8I21erH"
+ },
+ "source": [
+ "## Import related modules"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "H7I7zsyYfoLJ"
+ },
+ "outputs": [],
+ "source": [
+ "from finrl.config_tickers import DOW_30_TICKER\n",
+ "from finrl.config import INDICATORS\n",
+ "from finrl.meta.env_stock_trading.env_stocktrading_np import StockTradingEnv\n",
+ "\n",
+ "import numpy as np\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "0EVJIQUR6_fu"
+ },
+ "source": [
+ "## PPO"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "-EYx40S84tzo"
+ },
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import time\n",
+ "import gymnasium as gym\n",
+ "import numpy as np\n",
+ "import numpy.random as rd\n",
+ "import torch\n",
+ "import torch.nn as nn\n",
+ "from typing import List, Tuple\n",
+ "from copy import deepcopy\n",
+ "from torch import Tensor\n",
+ "from torch.distributions.normal import Normal\n",
+ "\n",
+ "\n",
+ "class ActorPPO(nn.Module):\n",
+ " def __init__(self, dims: List[int], state_dim: int, action_dim: int):\n",
+ " super().__init__()\n",
+ " self.net = build_mlp(dims=[state_dim, *dims, action_dim])\n",
+ " self.action_std_log = nn.Parameter(torch.zeros((1, action_dim)), requires_grad=True) # trainable parameter\n",
+ "\n",
+ " def forward(self, state: Tensor) -> Tensor:\n",
+ " return self.net(state).tanh() # action.tanh()\n",
+ "\n",
+ " def get_action(self, state: Tensor) -> Tuple[Tensor, Tensor]: # for exploration\n",
+ " action_avg = self.net(state)\n",
+ " action_std = self.action_std_log.exp()\n",
+ "\n",
+ " dist = Normal(action_avg, action_std)\n",
+ " action = dist.sample()\n",
+ " logprob = dist.log_prob(action).sum(1)\n",
+ " return action, logprob\n",
+ "\n",
+ " def get_logprob_entropy(self, state: Tensor, action: Tensor) -> Tuple[Tensor, Tensor]:\n",
+ " action_avg = self.net(state)\n",
+ " action_std = self.action_std_log.exp()\n",
+ "\n",
+ " dist = Normal(action_avg, action_std)\n",
+ " logprob = dist.log_prob(action).sum(1)\n",
+ " entropy = dist.entropy().sum(1)\n",
+ " return logprob, entropy\n",
+ "\n",
+ " @staticmethod\n",
+ " def convert_action_for_env(action: Tensor) -> Tensor:\n",
+ " return action.tanh()\n",
+ "\n",
+ "\n",
+ "class CriticPPO(nn.Module):\n",
+ " def __init__(self, dims: List[int], state_dim: int, _action_dim: int):\n",
+ " super().__init__()\n",
+ " self.net = build_mlp(dims=[state_dim, *dims, 1])\n",
+ "\n",
+ " def forward(self, state: Tensor) -> Tensor:\n",
+ " return self.net(state) # advantage value\n",
+ "\n",
+ "\n",
+ "def build_mlp(dims: List[int]) -> nn.Sequential: # MLP (MultiLayer Perceptron)\n",
+ " net_list = []\n",
+ " for i in range(len(dims) - 1):\n",
+ " net_list.extend([nn.Linear(dims[i], dims[i + 1]), nn.ReLU()])\n",
+ " del net_list[-1] # remove the activation of output layer\n",
+ " return nn.Sequential(*net_list)\n",
+ "\n",
+ "\n",
+ "class Config:\n",
+ " def __init__(self, agent_class=None, env_class=None, env_args=None):\n",
+ " self.env_class = env_class # env = env_class(**env_args)\n",
+ " self.env_args = env_args # env = env_class(**env_args)\n",
+ "\n",
+ " if env_args is None: # dummy env_args\n",
+ " env_args = {'env_name': None, 'state_dim': None, 'action_dim': None, 'if_discrete': None}\n",
+ " self.env_name = env_args['env_name'] # the name of environment. Be used to set 'cwd'.\n",
+ " self.state_dim = env_args['state_dim'] # vector dimension (feature number) of state\n",
+ " self.action_dim = env_args['action_dim'] # vector dimension (feature number) of action\n",
+ " self.if_discrete = env_args['if_discrete'] # discrete or continuous action space\n",
+ "\n",
+ " self.agent_class = agent_class # agent = agent_class(...)\n",
+ "\n",
+ " '''Arguments for reward shaping'''\n",
+ " self.gamma = 0.99 # discount factor of future rewards\n",
+ " self.reward_scale = 1.0 # an approximate target reward usually be closed to 256\n",
+ "\n",
+ " '''Arguments for training'''\n",
+ " self.gpu_id = int(0) # `int` means the ID of single GPU, -1 means CPU\n",
+ " self.net_dims = (64, 32) # the middle layer dimension of MLP (MultiLayer Perceptron)\n",
+ " self.learning_rate = 6e-5 # 2 ** -14 ~= 6e-5\n",
+ " self.soft_update_tau = 5e-3 # 2 ** -8 ~= 5e-3\n",
+ " self.batch_size = int(128) # num of transitions sampled from replay buffer.\n",
+ " self.horizon_len = int(2000) # collect horizon_len step while exploring, then update network\n",
+ " self.buffer_size = None # ReplayBuffer size. Empty the ReplayBuffer for on-policy.\n",
+ " self.repeat_times = 8.0 # repeatedly update network using ReplayBuffer to keep critic's loss small\n",
+ "\n",
+ " '''Arguments for evaluate'''\n",
+ " self.cwd = None # current working directory to save model. None means set automatically\n",
+ " self.break_step = +np.inf # break training if 'total_step > break_step'\n",
+ " self.eval_times = int(32) # number of times that get episodic cumulative return\n",
+ " self.eval_per_step = int(2e4) # evaluate the agent per training steps\n",
+ "\n",
+ " def init_before_training(self):\n",
+ " if self.cwd is None: # set cwd (current working directory) for saving model\n",
+ " self.cwd = f'./{self.env_name}_{self.agent_class.__name__[5:]}'\n",
+ " os.makedirs(self.cwd, exist_ok=True)\n",
+ "\n",
+ "\n",
+ "def get_gym_env_args(env, if_print: bool) -> dict:\n",
+ " if {'unwrapped', 'observation_space', 'action_space', 'spec'}.issubset(dir(env)): # isinstance(env, gym.Env):\n",
+ " env_name = env.unwrapped.spec.id\n",
+ " state_shape = env.observation_space.shape\n",
+ " state_dim = state_shape[0] if len(state_shape) == 1 else state_shape # sometimes state_dim is a list\n",
+ "\n",
+ " if_discrete = isinstance(env.action_space, gym.spaces.Discrete)\n",
+ " if if_discrete: # make sure it is discrete action space\n",
+ " action_dim = env.action_space.n\n",
+ " elif isinstance(env.action_space, gym.spaces.Box): # make sure it is continuous action space\n",
+ " action_dim = env.action_space.shape[0]\n",
+ "\n",
+ " env_args = {'env_name': env_name, 'state_dim': state_dim, 'action_dim': action_dim, 'if_discrete': if_discrete}\n",
+ " print(f\"env_args = {repr(env_args)}\") if if_print else None\n",
+ " return env_args\n",
+ "\n",
+ "\n",
+ "def kwargs_filter(function, kwargs: dict) -> dict:\n",
+ " import inspect\n",
+ " sign = inspect.signature(function).parameters.values()\n",
+ " sign = {val.name for val in sign}\n",
+ " common_args = sign.intersection(kwargs.keys())\n",
+ " return {key: kwargs[key] for key in common_args} # filtered kwargs\n",
+ "\n",
+ "\n",
+ "def build_env(env_class=None, env_args=None):\n",
+ " if env_class.__module__ == 'gym.envs.registration': # special rule\n",
+ " env = env_class(id=env_args['env_name'])\n",
+ " else:\n",
+ " env = env_class(**kwargs_filter(env_class.__init__, env_args.copy()))\n",
+ " for attr_str in ('env_name', 'state_dim', 'action_dim', 'if_discrete'):\n",
+ " setattr(env, attr_str, env_args[attr_str])\n",
+ " return env\n",
+ "\n",
+ "\n",
+ "class AgentBase:\n",
+ " def __init__(self, net_dims: List[int], state_dim: int, action_dim: int, gpu_id: int = 0, args: Config = Config()):\n",
+ " self.state_dim = state_dim\n",
+ " self.action_dim = action_dim\n",
+ "\n",
+ " self.gamma = args.gamma\n",
+ " self.batch_size = args.batch_size\n",
+ " self.repeat_times = args.repeat_times\n",
+ " self.reward_scale = args.reward_scale\n",
+ " self.soft_update_tau = args.soft_update_tau\n",
+ "\n",
+ " self.states = None # assert self.states == (1, state_dim)\n",
+ " self.device = torch.device(f\"cuda:{gpu_id}\" if (torch.cuda.is_available() and (gpu_id >= 0)) else \"cpu\")\n",
+ "\n",
+ " act_class = getattr(self, \"act_class\", None)\n",
+ " cri_class = getattr(self, \"cri_class\", None)\n",
+ " self.act = self.act_target = act_class(net_dims, state_dim, action_dim).to(self.device)\n",
+ " self.cri = self.cri_target = cri_class(net_dims, state_dim, action_dim).to(self.device) \\\n",
+ " if cri_class else self.act\n",
+ "\n",
+ " self.act_optimizer = torch.optim.Adam(self.act.parameters(), args.learning_rate)\n",
+ " self.cri_optimizer = torch.optim.Adam(self.cri.parameters(), args.learning_rate) \\\n",
+ " if cri_class else self.act_optimizer\n",
+ "\n",
+ " self.criterion = torch.nn.SmoothL1Loss()\n",
+ "\n",
+ " @staticmethod\n",
+ " def optimizer_update(optimizer, objective: Tensor):\n",
+ " optimizer.zero_grad()\n",
+ " objective.backward()\n",
+ " optimizer.step()\n",
+ "\n",
+ " @staticmethod\n",
+ " def soft_update(target_net: torch.nn.Module, current_net: torch.nn.Module, tau: float):\n",
+ " for tar, cur in zip(target_net.parameters(), current_net.parameters()):\n",
+ " tar.data.copy_(cur.data * tau + tar.data * (1.0 - tau))\n",
+ "\n",
+ "\n",
+ "class AgentPPO(AgentBase):\n",
+ " def __init__(self, net_dims: List[int], state_dim: int, action_dim: int, gpu_id: int = 0, args: Config = Config()):\n",
+ " self.if_off_policy = False\n",
+ " self.act_class = getattr(self, \"act_class\", ActorPPO)\n",
+ " self.cri_class = getattr(self, \"cri_class\", CriticPPO)\n",
+ " AgentBase.__init__(self, net_dims, state_dim, action_dim, gpu_id, args)\n",
+ "\n",
+ " self.ratio_clip = getattr(args, \"ratio_clip\", 0.25) # `ratio.clamp(1 - clip, 1 + clip)`\n",
+ " self.lambda_gae_adv = getattr(args, \"lambda_gae_adv\", 0.95) # could be 0.80~0.99\n",
+ " self.lambda_entropy = getattr(args, \"lambda_entropy\", 0.01) # could be 0.00~0.10\n",
+ " self.lambda_entropy = torch.tensor(self.lambda_entropy, dtype=torch.float32, device=self.device)\n",
+ "\n",
+ " def explore_env(self, env, horizon_len: int) -> List[Tensor]:\n",
+ " states = torch.zeros((horizon_len, self.state_dim), dtype=torch.float32).to(self.device)\n",
+ " actions = torch.zeros((horizon_len, self.action_dim), dtype=torch.float32).to(self.device)\n",
+ " logprobs = torch.zeros(horizon_len, dtype=torch.float32).to(self.device)\n",
+ " rewards = torch.zeros(horizon_len, dtype=torch.float32).to(self.device)\n",
+ " dones = torch.zeros(horizon_len, dtype=torch.bool).to(self.device)\n",
+ "\n",
+ " ary_state = self.states[0]\n",
+ "\n",
+ " get_action = self.act.get_action\n",
+ " convert = self.act.convert_action_for_env\n",
+ " for i in range(horizon_len):\n",
+ " state = torch.as_tensor(ary_state, dtype=torch.float32, device=self.device)\n",
+ " action, logprob = [t.squeeze(0) for t in get_action(state.unsqueeze(0))[:2]]\n",
+ "\n",
+ " ary_action = convert(action).detach().cpu().numpy()\n",
+ " ary_state, reward, done, _, _ = env.step(ary_action)\n",
+ " if done:\n",
+ " ary_state, _ = env.reset()\n",
+ "\n",
+ " states[i] = state\n",
+ " actions[i] = action\n",
+ " logprobs[i] = logprob\n",
+ " rewards[i] = reward\n",
+ " dones[i] = done\n",
+ "\n",
+ " self.states[0] = ary_state\n",
+ " rewards = (rewards * self.reward_scale).unsqueeze(1)\n",
+ " undones = (1 - dones.type(torch.float32)).unsqueeze(1)\n",
+ " return states, actions, logprobs, rewards, undones\n",
+ "\n",
+ " def update_net(self, buffer) -> List[float]:\n",
+ " with torch.no_grad():\n",
+ " states, actions, logprobs, rewards, undones = buffer\n",
+ " buffer_size = states.shape[0]\n",
+ "\n",
+ " '''get advantages reward_sums'''\n",
+ " bs = 2 ** 10 # set a smaller 'batch_size' when out of GPU memory.\n",
+ " values = [self.cri(states[i:i + bs]) for i in range(0, buffer_size, bs)]\n",
+ " values = torch.cat(values, dim=0).squeeze(1) # values.shape == (buffer_size, )\n",
+ "\n",
+ " advantages = self.get_advantages(rewards, undones, values) # advantages.shape == (buffer_size, )\n",
+ " reward_sums = advantages + values # reward_sums.shape == (buffer_size, )\n",
+ " del rewards, undones, values\n",
+ "\n",
+ " advantages = (advantages - advantages.mean()) / (advantages.std(dim=0) + 1e-5)\n",
+ " assert logprobs.shape == advantages.shape == reward_sums.shape == (buffer_size,)\n",
+ "\n",
+ " '''update network'''\n",
+ " obj_critics = 0.0\n",
+ " obj_actors = 0.0\n",
+ "\n",
+ " update_times = int(buffer_size * self.repeat_times / self.batch_size)\n",
+ " assert update_times >= 1\n",
+ " for _ in range(update_times):\n",
+ " indices = torch.randint(buffer_size, size=(self.batch_size,), requires_grad=False)\n",
+ " state = states[indices]\n",
+ " action = actions[indices]\n",
+ " logprob = logprobs[indices]\n",
+ " advantage = advantages[indices]\n",
+ " reward_sum = reward_sums[indices]\n",
+ "\n",
+ " value = self.cri(state).squeeze(1) # critic network predicts the reward_sum (Q value) of state\n",
+ " obj_critic = self.criterion(value, reward_sum)\n",
+ " self.optimizer_update(self.cri_optimizer, obj_critic)\n",
+ "\n",
+ " new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action)\n",
+ " ratio = (new_logprob - logprob.detach()).exp()\n",
+ " surrogate1 = advantage * ratio\n",
+ " surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip)\n",
+ " obj_surrogate = torch.min(surrogate1, surrogate2).mean()\n",
+ "\n",
+ " obj_actor = obj_surrogate + obj_entropy.mean() * self.lambda_entropy\n",
+ " self.optimizer_update(self.act_optimizer, -obj_actor)\n",
+ "\n",
+ " obj_critics += obj_critic.item()\n",
+ " obj_actors += obj_actor.item()\n",
+ " a_std_log = getattr(self.act, 'a_std_log', torch.zeros(1)).mean()\n",
+ " return obj_critics / update_times, obj_actors / update_times, a_std_log.item()\n",
+ "\n",
+ " def get_advantages(self, rewards: Tensor, undones: Tensor, values: Tensor) -> Tensor:\n",
+ " advantages = torch.empty_like(values) # advantage value\n",
+ "\n",
+ " masks = undones * self.gamma\n",
+ " horizon_len = rewards.shape[0]\n",
+ "\n",
+ " next_state = torch.tensor(self.states, dtype=torch.float32).to(self.device)\n",
+ " next_value = self.cri(next_state).detach()[0, 0]\n",
+ "\n",
+ " advantage = 0 # last_gae_lambda\n",
+ " for t in range(horizon_len - 1, -1, -1):\n",
+ " delta = rewards[t] + masks[t] * next_value - values[t]\n",
+ " advantages[t] = advantage = delta + masks[t] * self.lambda_gae_adv * advantage\n",
+ " next_value = values[t]\n",
+ " return advantages\n",
+ "\n",
+ "\n",
+ "class PendulumEnv(gym.Wrapper): # a demo of custom gym env\n",
+ " def __init__(self):\n",
+ " gym_env_name = \"Pendulum-v1\"\n",
+ " super().__init__(env=gym.make(gym_env_name))\n",
+ "\n",
+ " '''the necessary env information when you design a custom env'''\n",
+ " self.env_name = gym_env_name # the name of this env.\n",
+ " self.state_dim = self.observation_space.shape[0] # feature number of state\n",
+ " self.action_dim = self.action_space.shape[0] # feature number of action\n",
+ " self.if_discrete = False # discrete action or continuous action\n",
+ "\n",
+ " def reset(self) -> np.ndarray: # reset the agent in env\n",
+ " resetted_env, _ = self.env.reset()\n",
+ " return resetted_env\n",
+ "\n",
+ " def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]: # agent interacts in env\n",
+ " # We suggest that adjust action space to (-1, +1) when designing a custom env.\n",
+ " state, reward, done, info_dict, _ = self.env.step(action * 2)\n",
+ " return state.reshape(self.state_dim), float(reward), done, info_dict\n",
+ "\n",
+ "\n",
+ "def train_agent(args: Config):\n",
+ " args.init_before_training()\n",
+ "\n",
+ " env = build_env(args.env_class, args.env_args)\n",
+ " agent = args.agent_class(args.net_dims, args.state_dim, args.action_dim, gpu_id=args.gpu_id, args=args)\n",
+ "\n",
+ " new_env, _ = env.reset()\n",
+ " agent.states = new_env[np.newaxis, :]\n",
+ "\n",
+ " evaluator = Evaluator(eval_env=build_env(args.env_class, args.env_args),\n",
+ " eval_per_step=args.eval_per_step,\n",
+ " eval_times=args.eval_times,\n",
+ " cwd=args.cwd)\n",
+ " torch.set_grad_enabled(False)\n",
+ " while True: # start training\n",
+ " buffer_items = agent.explore_env(env, args.horizon_len)\n",
+ "\n",
+ " torch.set_grad_enabled(True)\n",
+ " logging_tuple = agent.update_net(buffer_items)\n",
+ " torch.set_grad_enabled(False)\n",
+ "\n",
+ " evaluator.evaluate_and_save(agent.act, args.horizon_len, logging_tuple)\n",
+ " if (evaluator.total_step > args.break_step) or os.path.exists(f\"{args.cwd}/stop\"):\n",
+ " torch.save(agent.act.state_dict(), args.cwd + '/actor.pth')\n",
+ " break # stop training when reach `break_step` or `mkdir cwd/stop`\n",
+ "\n",
+ "\n",
+ "def render_agent(env_class, env_args: dict, net_dims: List[int], agent_class, actor_path: str, render_times: int = 8):\n",
+ " env = build_env(env_class, env_args)\n",
+ "\n",
+ " state_dim = env_args['state_dim']\n",
+ " action_dim = env_args['action_dim']\n",
+ " agent = agent_class(net_dims, state_dim, action_dim, gpu_id=-1)\n",
+ " actor = agent.act\n",
+ "\n",
+ " print(f\"| render and load actor from: {actor_path}\")\n",
+ " actor.load_state_dict(torch.load(actor_path, map_location=lambda storage, loc: storage))\n",
+ " for i in range(render_times):\n",
+ " cumulative_reward, episode_step = get_rewards_and_steps(env, actor, if_render=True)\n",
+ " print(f\"|{i:4} cumulative_reward {cumulative_reward:9.3f} episode_step {episode_step:5.0f}\")\n",
+ "\n",
+ "\n",
+ "class Evaluator:\n",
+ " def __init__(self, eval_env, eval_per_step: int = 1e4, eval_times: int = 8, cwd: str = '.'):\n",
+ " self.cwd = cwd\n",
+ " self.env_eval = eval_env\n",
+ " self.eval_step = 0\n",
+ " self.total_step = 0\n",
+ " self.start_time = time.time()\n",
+ " self.eval_times = eval_times # number of times that get episodic cumulative return\n",
+ " self.eval_per_step = eval_per_step # evaluate the agent per training steps\n",
+ "\n",
+ " self.recorder = []\n",
+ " print(f\"\\n| `step`: Number of samples, or total training steps, or running times of `env.step()`.\"\n",
+ " f\"\\n| `time`: Time spent from the start of training to this moment.\"\n",
+ " f\"\\n| `avgR`: Average value of cumulative rewards, which is the sum of rewards in an episode.\"\n",
+ " f\"\\n| `stdR`: Standard dev of cumulative rewards, which is the sum of rewards in an episode.\"\n",
+ " f\"\\n| `avgS`: Average of steps in an episode.\"\n",
+ " f\"\\n| `objC`: Objective of Critic network. Or call it loss function of critic network.\"\n",
+ " f\"\\n| `objA`: Objective of Actor network. It is the average Q value of the critic network.\"\n",
+ " f\"\\n| {'step':>8} {'time':>8} | {'avgR':>8} {'stdR':>6} {'avgS':>6} | {'objC':>8} {'objA':>8}\")\n",
+ "\n",
+ " def evaluate_and_save(self, actor, horizon_len: int, logging_tuple: tuple):\n",
+ " self.total_step += horizon_len\n",
+ " if self.eval_step + self.eval_per_step > self.total_step:\n",
+ " return\n",
+ " self.eval_step = self.total_step\n",
+ "\n",
+ " rewards_steps_ary = [get_rewards_and_steps(self.env_eval, actor) for _ in range(self.eval_times)]\n",
+ " rewards_steps_ary = np.array(rewards_steps_ary, dtype=np.float32)\n",
+ " avg_r = rewards_steps_ary[:, 0].mean() # average of cumulative rewards\n",
+ " std_r = rewards_steps_ary[:, 0].std() # std of cumulative rewards\n",
+ " avg_s = rewards_steps_ary[:, 1].mean() # average of steps in an episode\n",
+ "\n",
+ " used_time = time.time() - self.start_time\n",
+ " self.recorder.append((self.total_step, used_time, avg_r))\n",
+ "\n",
+ " print(f\"| {self.total_step:8.2e} {used_time:8.0f} \"\n",
+ " f\"| {avg_r:8.2f} {std_r:6.2f} {avg_s:6.0f} \"\n",
+ " f\"| {logging_tuple[0]:8.2f} {logging_tuple[1]:8.2f}\")\n",
+ "\n",
+ "\n",
+ "def get_rewards_and_steps(env, actor, if_render: bool = False) -> Tuple[float, int]: # cumulative_rewards and episode_steps\n",
+ " device = next(actor.parameters()).device # net.parameters() is a Python generator.\n",
+ "\n",
+ " state, _ = env.reset()\n",
+ " episode_steps = 0\n",
+ " cumulative_returns = 0.0 # sum of rewards in an episode\n",
+ " for episode_steps in range(12345):\n",
+ " tensor_state = torch.as_tensor(state, dtype=torch.float32, device=device).unsqueeze(0)\n",
+ " tensor_action = actor(tensor_state)\n",
+ " action = tensor_action.detach().cpu().numpy()[0] # not need detach(), because using torch.no_grad() outside\n",
+ " state, reward, done, _, _ = env.step(action)\n",
+ " cumulative_returns += reward\n",
+ "\n",
+ " if if_render:\n",
+ " env.render()\n",
+ " if done:\n",
+ " break\n",
+ " return cumulative_returns, episode_steps + 1"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "9tzAw9k26nAC"
+ },
+ "source": [
+ "## DRL Agent Class"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "pwCbbocm6PHM"
+ },
+ "outputs": [],
+ "source": [
+ "from __future__ import annotations\n",
+ "\n",
+ "import torch\n",
+ "# from elegantrl.agents import AgentA2C\n",
+ "\n",
+ "MODELS = {\"ppo\": AgentPPO}\n",
+ "OFF_POLICY_MODELS = [\"ddpg\", \"td3\", \"sac\"]\n",
+ "ON_POLICY_MODELS = [\"ppo\"]\n",
+ "# MODEL_KWARGS = {x: config.__dict__[f\"{x.upper()}_PARAMS\"] for x in MODELS.keys()}\n",
+ "#\n",
+ "# NOISE = {\n",
+ "# \"normal\": NormalActionNoise,\n",
+ "# \"ornstein_uhlenbeck\": OrnsteinUhlenbeckActionNoise,\n",
+ "# }\n",
+ "\n",
+ "\n",
+ "class DRLAgent:\n",
+ " \"\"\"Implementations of DRL algorithms\n",
+ " Attributes\n",
+ " ----------\n",
+ " env: gym environment class\n",
+ " user-defined class\n",
+ " Methods\n",
+ " -------\n",
+ " get_model()\n",
+ " setup DRL algorithms\n",
+ " train_model()\n",
+ " train DRL algorithms in a train dataset\n",
+ " and output the trained model\n",
+ " DRL_prediction()\n",
+ " make a prediction in a test dataset and get results\n",
+ " \"\"\"\n",
+ "\n",
+ " def __init__(self, env, price_array, tech_array, turbulence_array):\n",
+ " self.env = env\n",
+ " self.price_array = price_array\n",
+ " self.tech_array = tech_array\n",
+ " self.turbulence_array = turbulence_array\n",
+ "\n",
+ " def get_model(self, model_name, model_kwargs):\n",
+ " env_config = {\n",
+ " \"price_array\": self.price_array,\n",
+ " \"tech_array\": self.tech_array,\n",
+ " \"turbulence_array\": self.turbulence_array,\n",
+ " \"if_train\": True,\n",
+ " }\n",
+ " environment = self.env(config=env_config)\n",
+ " env_args = {'config': env_config,\n",
+ " 'env_name': environment.env_name,\n",
+ " 'state_dim': environment.state_dim,\n",
+ " 'action_dim': environment.action_dim,\n",
+ " 'if_discrete': False}\n",
+ " agent = MODELS[model_name]\n",
+ " if model_name not in MODELS:\n",
+ " raise NotImplementedError(\"NotImplementedError\")\n",
+ " model = Config(agent_class=agent, env_class=self.env, env_args=env_args)\n",
+ " model.if_off_policy = model_name in OFF_POLICY_MODELS\n",
+ " if model_kwargs is not None:\n",
+ " try:\n",
+ " model.learning_rate = model_kwargs[\"learning_rate\"]\n",
+ " model.batch_size = model_kwargs[\"batch_size\"]\n",
+ " model.gamma = model_kwargs[\"gamma\"]\n",
+ " model.seed = model_kwargs[\"seed\"]\n",
+ " model.net_dims = model_kwargs[\"net_dimension\"]\n",
+ " model.target_step = model_kwargs[\"target_step\"]\n",
+ " model.eval_gap = model_kwargs[\"eval_gap\"]\n",
+ " model.eval_times = model_kwargs[\"eval_times\"]\n",
+ " except BaseException:\n",
+ " raise ValueError(\n",
+ " \"Fail to read arguments, please check 'model_kwargs' input.\"\n",
+ " )\n",
+ " return model\n",
+ "\n",
+ " def train_model(self, model, cwd, total_timesteps=5000):\n",
+ " model.cwd = cwd\n",
+ " model.break_step = total_timesteps\n",
+ " train_agent(model)\n",
+ "\n",
+ " @staticmethod\n",
+ " def DRL_prediction(model_name, cwd, net_dimension, environment):\n",
+ " if model_name not in MODELS:\n",
+ " raise NotImplementedError(\"NotImplementedError\")\n",
+ " agent_class = MODELS[model_name]\n",
+ " environment.env_num = 1\n",
+ " agent = agent_class(net_dimension, environment.state_dim, environment.action_dim)\n",
+ " actor = agent.act\n",
+ " # load agent\n",
+ " try:\n",
+ " cwd = cwd + '/actor.pth'\n",
+ " print(f\"| load actor from: {cwd}\")\n",
+ " actor.load_state_dict(torch.load(cwd, map_location=lambda storage, loc: storage))\n",
+ " act = actor\n",
+ " device = agent.device\n",
+ " except BaseException:\n",
+ " raise ValueError(\"Fail to load agent!\")\n",
+ "\n",
+ " # test on the testing env\n",
+ " _torch = torch\n",
+ " state, _ = environment.reset()\n",
+ " episode_returns = [] # the cumulative_return / initial_account\n",
+ " episode_total_assets = [environment.initial_total_asset]\n",
+ " with _torch.no_grad():\n",
+ " for i in range(environment.max_step):\n",
+ " s_tensor = _torch.as_tensor((state,), device=device)\n",
+ " a_tensor = act(s_tensor) # action_tanh = act.forward()\n",
+ " action = (\n",
+ " a_tensor.detach().cpu().numpy()[0]\n",
+ " ) # not need detach(), because with torch.no_grad() outside\n",
+ " state, reward, done, _, _ = environment.step(action)\n",
+ "\n",
+ " total_asset = (\n",
+ " environment.amount\n",
+ " + (\n",
+ " environment.price_ary[environment.day] * environment.stocks\n",
+ " ).sum()\n",
+ " )\n",
+ " episode_total_assets.append(total_asset)\n",
+ " episode_return = total_asset / environment.initial_total_asset\n",
+ " episode_returns.append(episode_return)\n",
+ " if done:\n",
+ " break\n",
+ " print(\"Test Finished!\")\n",
+ " # return episode total_assets on testing data\n",
+ " print(\"episode_return\", episode_return)\n",
+ " return episode_total_assets\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "zjLda8No6pvI"
+ },
+ "source": [
+ "## Train & Test Functions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "j8-e03ev32oz"
+ },
+ "outputs": [],
+ "source": [
+ "from __future__ import annotations\n",
+ "\n",
+ "from finrl.meta.data_processor import DataProcessor\n",
+ "\n",
+ "def train(\n",
+ " start_date,\n",
+ " end_date,\n",
+ " ticker_list,\n",
+ " data_source,\n",
+ " time_interval,\n",
+ " technical_indicator_list,\n",
+ " drl_lib,\n",
+ " env,\n",
+ " model_name,\n",
+ " if_vix=True,\n",
+ " **kwargs,\n",
+ "):\n",
+ " # download data\n",
+ " dp = DataProcessor(data_source, **kwargs)\n",
+ " data = dp.download_data(ticker_list, start_date, end_date, time_interval)\n",
+ " data = dp.clean_data(data)\n",
+ " data = dp.add_technical_indicator(data, technical_indicator_list)\n",
+ " if if_vix:\n",
+ " data = dp.add_vix(data)\n",
+ " else:\n",
+ " data = dp.add_turbulence(data)\n",
+ " price_array, tech_array, turbulence_array = dp.df_to_array(data, if_vix)\n",
+ " env_config = {\n",
+ " \"price_array\": price_array,\n",
+ " \"tech_array\": tech_array,\n",
+ " \"turbulence_array\": turbulence_array,\n",
+ " \"if_train\": True,\n",
+ " }\n",
+ " env_instance = env(config=env_config)\n",
+ "\n",
+ " # read parameters\n",
+ " cwd = kwargs.get(\"cwd\", \"./\" + str(model_name))\n",
+ "\n",
+ " if drl_lib == \"elegantrl\":\n",
+ " DRLAgent_erl = DRLAgent\n",
+ " break_step = kwargs.get(\"break_step\", 1e6)\n",
+ " erl_params = kwargs.get(\"erl_params\")\n",
+ " agent = DRLAgent_erl(\n",
+ " env=env,\n",
+ " price_array=price_array,\n",
+ " tech_array=tech_array,\n",
+ " turbulence_array=turbulence_array,\n",
+ " )\n",
+ " model = agent.get_model(model_name, model_kwargs=erl_params)\n",
+ " trained_model = agent.train_model(\n",
+ " model=model, cwd=cwd, total_timesteps=break_step\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "Evsg8QtEDHDO"
+ },
+ "outputs": [],
+ "source": [
+ "from __future__ import annotations\n",
+ "\n",
+ "from finrl.config import INDICATORS\n",
+ "from finrl.config import RLlib_PARAMS\n",
+ "from finrl.config import TEST_END_DATE\n",
+ "from finrl.config import TEST_START_DATE\n",
+ "from finrl.config_tickers import DOW_30_TICKER\n",
+ "\n",
+ "def test(\n",
+ " start_date,\n",
+ " end_date,\n",
+ " ticker_list,\n",
+ " data_source,\n",
+ " time_interval,\n",
+ " technical_indicator_list,\n",
+ " drl_lib,\n",
+ " env,\n",
+ " model_name,\n",
+ " if_vix=True,\n",
+ " **kwargs,\n",
+ "):\n",
+ "\n",
+ " # import data processor\n",
+ " from finrl.meta.data_processor import DataProcessor\n",
+ "\n",
+ " # fetch data\n",
+ " dp = DataProcessor(data_source, **kwargs)\n",
+ " data = dp.download_data(ticker_list, start_date, end_date, time_interval)\n",
+ " data = dp.clean_data(data)\n",
+ " data = dp.add_technical_indicator(data, technical_indicator_list)\n",
+ "\n",
+ " if if_vix:\n",
+ " data = dp.add_vix(data)\n",
+ " else:\n",
+ " data = dp.add_turbulence(data)\n",
+ " price_array, tech_array, turbulence_array = dp.df_to_array(data, if_vix)\n",
+ "\n",
+ " env_config = {\n",
+ " \"price_array\": price_array,\n",
+ " \"tech_array\": tech_array,\n",
+ " \"turbulence_array\": turbulence_array,\n",
+ " \"if_train\": False,\n",
+ " }\n",
+ " env_instance = env(config=env_config)\n",
+ "\n",
+ " # load elegantrl needs state dim, action dim and net dim\n",
+ " net_dimension = kwargs.get(\"net_dimension\", 2**7)\n",
+ " cwd = kwargs.get(\"cwd\", \"./\" + str(model_name))\n",
+ " print(\"price_array: \", len(price_array))\n",
+ "\n",
+ " if drl_lib == \"elegantrl\":\n",
+ " DRLAgent_erl = DRLAgent\n",
+ " episode_total_assets = DRLAgent_erl.DRL_prediction(\n",
+ " model_name=model_name,\n",
+ " cwd=cwd,\n",
+ " net_dimension=net_dimension,\n",
+ " environment=env_instance,\n",
+ " )\n",
+ " return episode_total_assets"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "pf5aVHAU-xF6"
+ },
+ "source": [
+ "## Import Dow Jones 30 Symbols"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "jx25TA_X87F-"
+ },
+ "outputs": [],
+ "source": [
+ "ticker_list = DOW_30_TICKER\n",
+ "action_dim = len(DOW_30_TICKER)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "UIV0kO_y-inG"
+ },
+ "outputs": [],
+ "source": [
+ "print(ticker_list)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "CnqQ-cC5-rfO"
+ },
+ "outputs": [],
+ "source": [
+ "print(INDICATORS)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "rZMkcyjZ-25l"
+ },
+ "source": [
+ "## Calculate the DRL state dimension manually for paper trading"
+ ]
+ },
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "1660 minutes til market open.\n",
- "1659 minutes til market open.\n",
- "1658 minutes til market open.\n",
- "1657 minutes til market open.\n",
- "1656 minutes til market open.\n"
- ]
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "GLfkTsXK-e90"
+ },
+ "outputs": [],
+ "source": [
+ "# amount + (turbulence, turbulence_bool) + (price, shares, cd (holding time)) * stock_dim + tech_dim\n",
+ "state_dim = 1 + 2 + 3 * action_dim + len(INDICATORS) * action_dim"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "QqUkvImG-n66"
+ },
+ "outputs": [],
+ "source": [
+ "state_dim"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "vdaI0oJ9B1Xk"
+ },
+ "outputs": [],
+ "source": [
+ "env = StockTradingEnv"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "J25MuZLiGqCP"
+ },
+ "source": [
+ "## Show the data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "puJZWm8NHtSN"
+ },
+ "source": [
+ "### Step 1. Pick a data source"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "3ZCru8f7GqgL"
+ },
+ "outputs": [],
+ "source": [
+ "# DP = DataProcessor(data_source = 'alpaca',\n",
+ "# API_KEY = API_KEY,\n",
+ "# API_SECRET = API_SECRET,\n",
+ "# API_BASE_URL = API_BASE_URL\n",
+ "# )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "nvPEW2mYHvkR"
+ },
+ "source": [
+ "### Step 2. Get ticker list, Set start date and end date, specify the data frequency"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "NPNxj6c8HIiE"
+ },
+ "outputs": [],
+ "source": [
+ "# data = DP.download_data(start_date = '2021-10-04',\n",
+ "# end_date = '2021-10-08',\n",
+ "# ticker_list = ticker_list,\n",
+ "# time_interval= '1Min')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "pPcazCq1d5ec"
+ },
+ "outputs": [],
+ "source": [
+ "# data['timestamp'].nunique()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "i46jGdE0IAel"
+ },
+ "source": [
+ "### Step 3. Data Cleaning & Feature Engineering"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "x9euUsEPHWFK"
+ },
+ "outputs": [],
+ "source": [
+ "# data = DP.clean_data(data)\n",
+ "# data = DP.add_technical_indicator(data, INDICATORS)\n",
+ "# data = DP.add_vix(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "GOcPTaAgHdxa"
+ },
+ "outputs": [],
+ "source": [
+ "# data.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "bbu03L_UIMWt"
+ },
+ "source": [
+ "### Step 4. Transform to numpy array"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "Rzj0vjZZHdGM"
+ },
+ "outputs": [],
+ "source": [
+ "# price_array, tech_array, turbulence_array = DP.df_to_array(data, if_vix=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "eW0UDAXI1nEa"
+ },
+ "source": [
+ "# Part 2: Train the agent"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "lArLOFcJ7VMO"
+ },
+ "source": [
+ "## Train"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "g1F84mebj4gu"
+ },
+ "outputs": [],
+ "source": [
+ "ERL_PARAMS = {\"learning_rate\": 3e-6,\"batch_size\": 2048,\"gamma\": 0.985,\n",
+ " \"seed\":312,\"net_dimension\":[128,64], \"target_step\":5000, \"eval_gap\":30,\n",
+ " \"eval_times\":1}\n",
+ "env = StockTradingEnv\n",
+ "# if you want to use larger datasets (change to longer period), and it raises error,\n",
+ "# please try to increase \"target_step\". It should be larger than the episode steps."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "BxcNI2fdNjip"
+ },
+ "outputs": [],
+ "source": [
+ "train(start_date = '2022-08-25',\n",
+ " end_date = '2022-08-31',\n",
+ " ticker_list = ticker_list,\n",
+ " data_source = 'alpaca',\n",
+ " time_interval= '1Min',\n",
+ " technical_indicator_list= INDICATORS,\n",
+ " drl_lib='elegantrl',\n",
+ " env=env,\n",
+ " model_name='ppo',\n",
+ " if_vix=True,\n",
+ " API_KEY = API_KEY,\n",
+ " API_SECRET = API_SECRET,\n",
+ " erl_params=ERL_PARAMS,\n",
+ " cwd='./papertrading_erl', #current_working_dir\n",
+ " break_step=1e5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "g37WugV_1pAS"
+ },
+ "source": [
+ "## Test"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "SxYoWCDa02TW"
+ },
+ "outputs": [],
+ "source": [
+ "account_value_erl=test(start_date = '2022-09-01',\n",
+ " end_date = '2022-09-02',\n",
+ " ticker_list = ticker_list,\n",
+ " data_source = 'alpaca',\n",
+ " time_interval= '1Min',\n",
+ " technical_indicator_list= INDICATORS,\n",
+ " drl_lib='elegantrl',\n",
+ " env=env,\n",
+ " model_name='ppo',\n",
+ " if_vix=True,\n",
+ " API_KEY = API_KEY,\n",
+ " API_SECRET = API_SECRET,\n",
+ " cwd='./papertrading_erl',\n",
+ " net_dimension = ERL_PARAMS['net_dimension'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "e8aNQ58X7avM"
+ },
+ "source": [
+ "## Use full data to train"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "3CQ9_Yv41r88"
+ },
+ "source": [
+ "After tuning well, retrain on the training and testing sets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "cUSgbwt_10V3"
+ },
+ "outputs": [],
+ "source": [
+ "train(start_date = '2022-08-25',\n",
+ " end_date = '2022-09-02',\n",
+ " ticker_list = ticker_list,\n",
+ " data_source = 'alpaca',\n",
+ " time_interval= '1Min',\n",
+ " technical_indicator_list= INDICATORS,\n",
+ " drl_lib='elegantrl',\n",
+ " env=env,\n",
+ " model_name='ppo',\n",
+ " if_vix=True,\n",
+ " API_KEY = API_KEY,\n",
+ " API_SECRET = API_SECRET,\n",
+ " erl_params=ERL_PARAMS,\n",
+ " cwd='./papertrading_erl_retrain',\n",
+ " break_step=2e5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "sIQN6Ggt7gXY"
+ },
+ "source": [
+ "# Part 3: Deploy the agent"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "UFoxkigg1zXa"
+ },
+ "source": [
+ "## Setup Alpaca Paper trading environment"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "LpkoZpYzfneS"
+ },
+ "outputs": [],
+ "source": [
+ "import datetime\n",
+ "import threading\n",
+ "from finrl.meta.data_processors.processor_alpaca import AlpacaProcessor\n",
+ "from alpaca.trading.client import TradingClient\n",
+ "from alpaca.trading.requests import GetOrdersRequest\n",
+ "from alpaca.trading.enums import OrderSide, QueryOrderStatus\n",
+ "import time\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import torch\n",
+ "import gymnasium as gym\n",
+ "\n",
+ "class AlpacaPaperTrading():\n",
+ "\n",
+ " def __init__(self,ticker_list, time_interval, drl_lib, agent, cwd, net_dim,\n",
+ " state_dim, action_dim, API_KEY, API_SECRET,\n",
+ " tech_indicator_list, turbulence_thresh=30,\n",
+ " max_stock=1e2, latency = None):\n",
+ " #load agent\n",
+ " self.drl_lib = drl_lib\n",
+ " if agent =='ppo':\n",
+ " if drl_lib == 'elegantrl':\n",
+ " agent_class = AgentPPO\n",
+ " agent = agent_class(net_dim, state_dim, action_dim)\n",
+ " actor = agent.act\n",
+ " # load agent\n",
+ " try:\n",
+ " cwd = cwd + '/actor.pth'\n",
+ " print(f\"| load actor from: {cwd}\")\n",
+ " actor.load_state_dict(torch.load(cwd, map_location=lambda storage, loc: storage))\n",
+ " self.act = actor\n",
+ " self.device = agent.device\n",
+ " except BaseException:\n",
+ " raise ValueError(\"Fail to load agent!\")\n",
+ "\n",
+ " elif drl_lib == 'rllib':\n",
+ " from ray.rllib.agents import ppo\n",
+ " from ray.rllib.agents.ppo.ppo import PPOTrainer\n",
+ "\n",
+ " config = ppo.DEFAULT_CONFIG.copy()\n",
+ " config['env'] = StockEnvEmpty\n",
+ " config[\"log_level\"] = \"WARN\"\n",
+ " config['env_config'] = {'state_dim':state_dim,\n",
+ " 'action_dim':action_dim,}\n",
+ " trainer = PPOTrainer(env=StockEnvEmpty, config=config)\n",
+ " trainer.restore(cwd)\n",
+ " try:\n",
+ " trainer.restore(cwd)\n",
+ " self.agent = trainer\n",
+ " print(\"Restoring from checkpoint path\", cwd)\n",
+ " except:\n",
+ " raise ValueError('Fail to load agent!')\n",
+ "\n",
+ " elif drl_lib == 'stable_baselines3':\n",
+ " from stable_baselines3 import PPO\n",
+ "\n",
+ " try:\n",
+ " #load agent\n",
+ " self.model = PPO.load(cwd)\n",
+ " print(\"Successfully load model\", cwd)\n",
+ " except:\n",
+ " raise ValueError('Fail to load agent!')\n",
+ "\n",
+ " else:\n",
+ " raise ValueError('The DRL library input is NOT supported yet. Please check your input.')\n",
+ "\n",
+ " else:\n",
+ " raise ValueError('Agent input is NOT supported yet.')\n",
+ "\n",
+ "\n",
+ "\n",
+ " #connect to Alpaca trading API\n",
+ " try:\n",
+ " self.alpaca = TradingClient(\n",
+ " api_key=API_KEY, secret_key=API_SECRET, paper=True\n",
+ " )\n",
+ " except:\n",
+ " raise ValueError('Fail to connect Alpaca. Please check account info and internet connection.')\n",
+ "\n",
+ " #read trading time interval\n",
+ " if time_interval == '1s':\n",
+ " self.time_interval = 1\n",
+ " elif time_interval == '5s':\n",
+ " self.time_interval = 5\n",
+ " elif time_interval == '1Min':\n",
+ " self.time_interval = 60\n",
+ " elif time_interval == '5Min':\n",
+ " self.time_interval = 60 * 5\n",
+ " elif time_interval == '15Min':\n",
+ " self.time_interval = 60 * 15\n",
+ " else:\n",
+ " raise ValueError('Time interval input is NOT supported yet.')\n",
+ "\n",
+ " #read trading settings\n",
+ " self.tech_indicator_list = tech_indicator_list\n",
+ " self.turbulence_thresh = turbulence_thresh\n",
+ " self.max_stock = max_stock\n",
+ "\n",
+ " #initialize account\n",
+ " self.stocks = np.asarray([0] * len(ticker_list)) #stocks holding\n",
+ " self.stocks_cd = np.zeros_like(self.stocks)\n",
+ " self.cash = None #cash record\n",
+ " self.stocks_df = pd.DataFrame(self.stocks, columns=['stocks'], index = ticker_list)\n",
+ " self.asset_list = []\n",
+ " self.price = np.asarray([0] * len(ticker_list))\n",
+ " self.stockUniverse = ticker_list\n",
+ " self.turbulence_bool = 0\n",
+ " self.equities = []\n",
+ "\n",
+ " def test_latency(self, test_times = 10):\n",
+ " total_time = 0\n",
+ " for i in range(0, test_times):\n",
+ " time0 = time.time()\n",
+ " self.get_state()\n",
+ " time1 = time.time()\n",
+ " temp_time = time1 - time0\n",
+ " total_time += temp_time\n",
+ " latency = total_time/test_times\n",
+ " print('latency for data processing: ', latency)\n",
+ " return latency\n",
+ "\n",
+ " def run(self):\n",
+ " # params to filter orders by\n",
+ " request_params = GetOrdersRequest(\n",
+ " status=QueryOrderStatus.OPEN\n",
+ " )\n",
+ "\n",
+ " # orders that satisfy params\n",
+ " orders = self.alpaca.get_orders(filter=request_params)\n",
+ " for order in orders:\n",
+ " self.alpaca.cancel_order(order.id)\n",
+ "\n",
+ " # Wait for market to open.\n",
+ " print(\"Waiting for market to open...\")\n",
+ " self.awaitMarketOpen()\n",
+ " print(\"Market opened.\")\n",
+ "\n",
+ " while True:\n",
+ "\n",
+ " # Figure out when the market will close so we can prepare to sell beforehand.\n",
+ " clock = self.alpaca.get_clock()\n",
+ " closingTime = clock.next_close.replace(tzinfo=datetime.timezone.utc).timestamp()\n",
+ " currTime = clock.timestamp.replace(tzinfo=datetime.timezone.utc).timestamp()\n",
+ " self.timeToClose = closingTime - currTime\n",
+ "\n",
+ " if(self.timeToClose < (60)):\n",
+ " # Close all positions when 1 minutes til market close.\n",
+ " print(\"Market closing soon. Stop trading.\")\n",
+ " break\n",
+ "\n",
+ " '''# Close all positions when 1 minutes til market close.\n",
+ " print(\"Market closing soon. Closing positions.\")\n",
+ "\n",
+ " threads = []\n",
+ " positions = self.alpaca..get_all_positions()\n",
+ " for position in positions:\n",
+ " if(position.side == 'long'):\n",
+ " orderSide = 'sell'\n",
+ " else:\n",
+ " orderSide = 'buy'\n",
+ " qty = abs(int(float(position.qty)))\n",
+ " respSO = []\n",
+ " tSubmitOrder = threading.Thread(target=self.submitOrder(qty, position.symbol, orderSide, respSO))\n",
+ " tSubmitOrder.start()\n",
+ " threads.append(tSubmitOrder) # record thread for joining later\n",
+ "\n",
+ " for x in threads: # wait for all threads to complete\n",
+ " x.join()\n",
+ " # Run script again after market close for next trading day.\n",
+ " print(\"Sleeping until market close (15 minutes).\")\n",
+ " time.sleep(60 * 15)'''\n",
+ "\n",
+ " else:\n",
+ " self.trade()\n",
+ " last_equity = float(self.alpaca.get_account().last_equity)\n",
+ " cur_time = time.time()\n",
+ " self.equities.append([cur_time,last_equity])\n",
+ " time.sleep(self.time_interval)\n",
+ "\n",
+ " def awaitMarketOpen(self):\n",
+ " isOpen = self.alpaca.get_clock().is_open\n",
+ " while(not isOpen):\n",
+ " clock = self.alpaca.get_clock()\n",
+ " openingTime = clock.next_open.replace(tzinfo=datetime.timezone.utc).timestamp()\n",
+ " currTime = clock.timestamp.replace(tzinfo=datetime.timezone.utc).timestamp()\n",
+ " timeToOpen = int((openingTime - currTime) / 60)\n",
+ " print(str(timeToOpen) + \" minutes til market open.\")\n",
+ " time.sleep(60)\n",
+ " isOpen = self.alpaca.get_clock().is_open\n",
+ "\n",
+ " def trade(self):\n",
+ " state = self.get_state()\n",
+ "\n",
+ " if self.drl_lib == 'elegantrl':\n",
+ " with torch.no_grad():\n",
+ " s_tensor = torch.as_tensor((state,), device=self.device)\n",
+ " a_tensor = self.act(s_tensor)\n",
+ " action = a_tensor.detach().cpu().numpy()[0]\n",
+ " action = (action * self.max_stock).astype(int)\n",
+ "\n",
+ " elif self.drl_lib == 'rllib':\n",
+ " action = self.agent.compute_single_action(state)\n",
+ "\n",
+ " elif self.drl_lib == 'stable_baselines3':\n",
+ " action = self.model.predict(state)[0]\n",
+ "\n",
+ " else:\n",
+ " raise ValueError('The DRL library input is NOT supported yet. Please check your input.')\n",
+ "\n",
+ " self.stocks_cd += 1\n",
+ " if self.turbulence_bool == 0:\n",
+ " min_action = 10 # stock_cd\n",
+ " threads = []\n",
+ " for index in np.where(action < -min_action)[0]: # sell_index:\n",
+ " sell_num_shares = min(self.stocks[index], -action[index])\n",
+ " qty = abs(int(sell_num_shares))\n",
+ " respSO = []\n",
+ " tSubmitOrder = threading.Thread(target=self.submitOrder(qty, self.stockUniverse[index], 'sell', respSO))\n",
+ " tSubmitOrder.start()\n",
+ " threads.append(tSubmitOrder) # record thread for joining later\n",
+ " self.cash = float(self.alpaca.get_account().cash)\n",
+ " self.stocks_cd[index] = 0\n",
+ "\n",
+ " for x in threads: # wait for all threads to complete\n",
+ " x.join()\n",
+ "\n",
+ " threads = []\n",
+ " for index in np.where(action > min_action)[0]: # buy_index:\n",
+ " if self.cash < 0:\n",
+ " tmp_cash = 0\n",
+ " else:\n",
+ " tmp_cash = self.cash\n",
+ " buy_num_shares = min(tmp_cash // self.price[index], abs(int(action[index])))\n",
+ " if (buy_num_shares != buy_num_shares): # if buy_num_change = nan\n",
+ " qty = 0 # set to 0 quantity\n",
+ " else:\n",
+ " qty = abs(int(buy_num_shares))\n",
+ " qty = abs(int(buy_num_shares))\n",
+ " respSO = []\n",
+ " tSubmitOrder = threading.Thread(target=self.submitOrder(qty, self.stockUniverse[index], 'buy', respSO))\n",
+ " tSubmitOrder.start()\n",
+ " threads.append(tSubmitOrder) # record thread for joining later\n",
+ " self.cash = float(self.alpaca.get_account().cash)\n",
+ " self.stocks_cd[index] = 0\n",
+ "\n",
+ " for x in threads: # wait for all threads to complete\n",
+ " x.join()\n",
+ "\n",
+ " else: # sell all when turbulence\n",
+ " threads = []\n",
+ " positions = self.alpaca.get_all_positions()\n",
+ " for position in positions:\n",
+ " if(position.side == 'long'):\n",
+ " orderSide = 'sell'\n",
+ " else:\n",
+ " orderSide = 'buy'\n",
+ " qty = abs(int(float(position.qty)))\n",
+ " respSO = []\n",
+ " tSubmitOrder = threading.Thread(target=self.submitOrder(qty, position.symbol, orderSide, respSO))\n",
+ " tSubmitOrder.start()\n",
+ " threads.append(tSubmitOrder) # record thread for joining later\n",
+ "\n",
+ " for x in threads: # wait for all threads to complete\n",
+ " x.join()\n",
+ "\n",
+ " self.stocks_cd[:] = 0\n",
+ "\n",
+ "\n",
+ " def get_state(self):\n",
+ " alpaca = AlpacaProcessor(API_KEY=API_KEY, API_SECRET=API_SECRET)\n",
+ " price, tech, turbulence = alpaca.fetch_latest_data(ticker_list = self.stockUniverse, time_interval='1Min',\n",
+ " tech_indicator_list=self.tech_indicator_list)\n",
+ " turbulence_bool = 1 if turbulence >= self.turbulence_thresh else 0\n",
+ "\n",
+ " turbulence = (self.sigmoid_sign(turbulence, self.turbulence_thresh) * 2 ** -5).astype(np.float32)\n",
+ "\n",
+ " tech = tech * 2 ** -7\n",
+ " positions = self.alpaca.get_all_positions()\n",
+ " stocks = [0] * len(self.stockUniverse)\n",
+ " for position in positions:\n",
+ " ind = self.stockUniverse.index(position.symbol)\n",
+ " stocks[ind] = ( abs(int(float(position.qty))))\n",
+ "\n",
+ " stocks = np.asarray(stocks, dtype = float)\n",
+ " cash = float(self.alpaca.get_account().cash)\n",
+ " self.cash = cash\n",
+ " self.stocks = stocks\n",
+ " self.turbulence_bool = turbulence_bool\n",
+ " self.price = price\n",
+ "\n",
+ "\n",
+ "\n",
+ " amount = np.array(self.cash * (2 ** -12), dtype=np.float32)\n",
+ " scale = np.array(2 ** -6, dtype=np.float32)\n",
+ " state = np.hstack((amount,\n",
+ " turbulence,\n",
+ " self.turbulence_bool,\n",
+ " price * scale,\n",
+ " self.stocks * scale,\n",
+ " self.stocks_cd,\n",
+ " tech,\n",
+ " )).astype(np.float32)\n",
+ " state[np.isnan(state)] = 0.0\n",
+ " state[np.isinf(state)] = 0.0\n",
+ " print(len(self.stockUniverse))\n",
+ " return state\n",
+ "\n",
+ " def submitOrder(self, qty, stock, side, resp):\n",
+ " if(qty > 0):\n",
+ " try:\n",
+ " self.alpaca.submit_order(stock, qty, side, \"market\", \"day\")\n",
+ " print(\"Market order of | \" + str(qty) + \" \" + stock + \" \" + side + \" | completed.\")\n",
+ " resp.append(True)\n",
+ " except:\n",
+ " print(\"Order of | \" + str(qty) + \" \" + stock + \" \" + side + \" | did not go through.\")\n",
+ " resp.append(False)\n",
+ " else:\n",
+ " print(\"Quantity is 0, order of | \" + str(qty) + \" \" + stock + \" \" + side + \" | not completed.\")\n",
+ " resp.append(True)\n",
+ "\n",
+ " @staticmethod\n",
+ " def sigmoid_sign(ary, thresh):\n",
+ " def sigmoid(x):\n",
+ " return 1 / (1 + np.exp(-x * np.e)) - 0.5\n",
+ "\n",
+ " return sigmoid(ary / thresh) * thresh\n",
+ "\n",
+ "class StockEnvEmpty(gym.Env):\n",
+ " #Empty Env used for loading rllib agent\n",
+ " def __init__(self,config):\n",
+ " state_dim = config['state_dim']\n",
+ " action_dim = config['action_dim']\n",
+ " self.env_num = 1\n",
+ " self.max_step = 10000\n",
+ " self.env_name = 'StockEnvEmpty'\n",
+ " self.state_dim = state_dim\n",
+ " self.action_dim = action_dim\n",
+ " self.if_discrete = False\n",
+ " self.target_return = 9999\n",
+ " self.observation_space = gym.spaces.Box(low=-3000, high=3000, shape=(state_dim,), dtype=np.float32)\n",
+ " self.action_space = gym.spaces.Box(low=-1, high=1, shape=(action_dim,), dtype=np.float32)\n",
+ "\n",
+ " def reset(self):\n",
+ " return\n",
+ "\n",
+ " def step(self, actions):\n",
+ " return"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "os4C4-4H7ns7"
+ },
+ "source": [
+ "## Run Paper trading"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "7nw0i-0UN3-7"
+ },
+ "outputs": [],
+ "source": [
+ "print(DOW_30_TICKER)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "YsSBK9ION1t6"
+ },
+ "outputs": [],
+ "source": [
+ "state_dim"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "xYtSv6P1N247"
+ },
+ "outputs": [],
+ "source": [
+ "action_dim"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "Kl9nulnAJtiI"
+ },
+ "outputs": [],
+ "source": [
+ "paper_trading_erl = AlpacaPaperTrading(ticker_list = DOW_30_TICKER,\n",
+ " time_interval = '1Min',\n",
+ " drl_lib = 'elegantrl',\n",
+ " agent = 'ppo',\n",
+ " cwd = './papertrading_erl_retrain',\n",
+ " net_dim = ERL_PARAMS['net_dimension'],\n",
+ " state_dim = state_dim,\n",
+ " action_dim = action_dim,\n",
+ " API_KEY = API_KEY,\n",
+ " API_SECRET = API_SECRET,\n",
+ " tech_indicator_list = INDICATORS,\n",
+ " turbulence_thresh=30,\n",
+ " max_stock=1e2)\n",
+ "paper_trading_erl.run()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "srzBZfYEUI1O"
+ },
+ "source": [
+ "# Part 4: Check Portfolio Performance"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "chovN1UhTAht"
+ },
+ "outputs": [],
+ "source": [
+ "from alpaca.trading.client import TradingClient\n",
+ "from alpaca.trading.models import TradeAccount, PortfolioHistory\n",
+ "from alpaca.trading.requests import GetOrdersRequest, GetPortfolioHistoryRequest\n",
+ "import pandas_market_calendars as tc\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import pytz\n",
+ "import yfinance as yf\n",
+ "import matplotlib.ticker as ticker\n",
+ "import matplotlib.dates as mdates\n",
+ "from datetime import datetime as dt\n",
+ "from finrl.plot import backtest_stats\n",
+ "import matplotlib.pyplot as plt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "CaofxMNCfAR1"
+ },
+ "outputs": [],
+ "source": [
+ "def get_trading_days(start, end):\n",
+ " nyse = tc.get_calendar('NYSE')\n",
+ " df = nyse.date_range_htf(\"1D\", pd.Timestamp(start), pd.Timestamp(end))\n",
+ " # df = nyse.sessions_in_range(pd.Timestamp(start),\n",
+ " # pd.Timestamp(end))\n",
+ " trading_days = []\n",
+ " for day in df:\n",
+ " trading_days.append(str(day)[:10])\n",
+ "\n",
+ " return trading_days\n",
+ "\n",
+ "def alpaca_history(key, secret, start, end):\n",
+ " trading_client = TradingClient(key, secret, paper=True)\n",
+ " trade_account = trading_client.get_account()\n",
+ " trading_days = get_trading_days(start, end)\n",
+ " df = pd.DataFrame()\n",
+ " for day in trading_days:\n",
+ " portfoil_history_request = GetPortfolioHistoryRequest(start=day, timeframe='1D')\n",
+ " portfoil_history = trading_client.get_portfolio_history(history_filter = portfoil_history_request)\n",
+ " # Create a DataFrame from the relevant parts of the JSON\n",
+ " df = pd.DataFrame({\n",
+ " 'timestamp': portfoil_history.timestamp,\n",
+ " 'equity': portfoil_history.equity,\n",
+ " 'profit_loss': portfoil_history.profit_loss,\n",
+ " 'profit_loss_pct': portfoil_history.profit_loss_pct\n",
+ " })\n",
+ "\n",
+ " # Convert the 'timestamp' column to a readable datetime format\n",
+ " df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')\n",
+ " # df= pd.concat([df, portfoil_history.df.iloc[:78]],ignore_index=True)\n",
+ "\n",
+ " equities = df.equity.values\n",
+ " cumu_returns = equities/equities[0]\n",
+ " cumu_returns = cumu_returns[~np.isnan(cumu_returns)]\n",
+ "\n",
+ " return df, cumu_returns\n",
+ "\n",
+ "def DIA_history(start):\n",
+ " data_df = yf.download(['^DJI'],start=start, interval=\"5m\")\n",
+ " data_df = data_df.iloc[:]\n",
+ " baseline_returns = data_df['Close'].values/data_df['Close'].values[0]\n",
+ " return data_df, baseline_returns"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "5CHiZRVpURpx"
+ },
+ "source": [
+ "## Get cumulative return"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "O_YT7v-LSdfV"
+ },
+ "outputs": [],
+ "source": [
+ "df_erl, cumu_erl = alpaca_history(key=API_KEY,\n",
+ " secret=API_SECRET,\n",
+ " start='2025-09-01', #must be within 1 month\n",
+ " end='2025-09-10') #change the date if error occurs\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "IMcQjwHOS6Zb"
+ },
+ "outputs": [],
+ "source": [
+ "df_djia, cumu_djia = DIA_history(start='2025-09-01')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "PJXPwmx9Ts5o"
+ },
+ "outputs": [],
+ "source": [
+ "df_erl.tail()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "o1Iaw90FTNfU"
+ },
+ "outputs": [],
+ "source": [
+ "returns_erl = cumu_erl -1\n",
+ "returns_dia = cumu_djia - 1\n",
+ "returns_dia = returns_dia[:returns_erl.shape[0]]\n",
+ "print('len of erl return: ', returns_erl.shape[0])\n",
+ "print('len of dia return: ', returns_dia.shape[0])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "2IawaMsDwZni"
+ },
+ "outputs": [],
+ "source": [
+ "returns_erl"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "5Z0LEm7KUZ5W"
+ },
+ "source": [
+ "## plot and save"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "Foqk1wIQTQJ3"
+ },
+ "outputs": [],
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "plt.figure(dpi=1000)\n",
+ "plt.grid()\n",
+ "plt.grid(which='minor', axis='y')\n",
+ "plt.title('Stock Trading (Paper trading)', fontsize=20)\n",
+ "plt.plot(returns_erl, label = 'ElegantRL Agent', color = 'red')\n",
+ "#plt.plot(returns_sb3, label = 'Stable-Baselines3 Agent', color = 'blue' )\n",
+ "#plt.plot(returns_rllib, label = 'RLlib Agent', color = 'green')\n",
+ "plt.plot(returns_dia, label = 'DJIA', color = 'grey')\n",
+ "plt.ylabel('Return', fontsize=16)\n",
+ "plt.xlabel('Year 2021', fontsize=16)\n",
+ "plt.xticks(size = 14)\n",
+ "plt.yticks(size = 14)\n",
+ "ax = plt.gca()\n",
+ "ax.xaxis.set_major_locator(ticker.MultipleLocator(78))\n",
+ "ax.xaxis.set_minor_locator(ticker.MultipleLocator(6))\n",
+ "ax.yaxis.set_minor_locator(ticker.MultipleLocator(0.005))\n",
+ "ax.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=1, decimals=2))\n",
+ "ax.xaxis.set_major_formatter(ticker.FixedFormatter(['','10-19','','10-20',\n",
+ " '','10-21','','10-22']))\n",
+ "plt.legend(fontsize=10.5)\n",
+ "plt.savefig('papertrading_stock.png')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "O_LsHVj_TZGL"
+ },
+ "outputs": [],
+ "source": []
}
- ],
- "source": [
- "paper_trading_erl = AlpacaPaperTrading(ticker_list = DOW_30_TICKER, \n",
- " time_interval = '1Min', \n",
- " drl_lib = 'elegantrl', \n",
- " agent = 'ppo', \n",
- " cwd = './papertrading_erl_retrain', \n",
- " net_dim = ERL_PARAMS['net_dimension'], \n",
- " state_dim = state_dim, \n",
- " action_dim= action_dim, \n",
- " API_KEY = API_KEY, \n",
- " API_SECRET = API_SECRET, \n",
- " API_BASE_URL = API_BASE_URL, \n",
- " tech_indicator_list = INDICATORS, \n",
- " turbulence_thresh=30, \n",
- " max_stock=1e2)\n",
- "paper_trading_erl.run()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "srzBZfYEUI1O"
- },
- "source": [
- "# Part 4: Check Portfolio Performance"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "chovN1UhTAht"
- },
- "outputs": [],
- "source": [
- "import alpaca_trade_api as tradeapi\n",
- "import pandas_market_calendars as tc\n",
- "import numpy as np\n",
- "import pandas as pd\n",
- "import pytz\n",
- "import yfinance as yf\n",
- "import matplotlib.ticker as ticker\n",
- "import matplotlib.dates as mdates\n",
- "from datetime import datetime as dt\n",
- "from finrl.plot import backtest_stats\n",
- "import matplotlib.pyplot as plt"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "CaofxMNCfAR1"
- },
- "outputs": [],
- "source": [
- "def get_trading_days(start, end):\n",
- " nyse = tc.get_calendar('NYSE')\n",
- " df = nyse.date_range_htf(\"1D\", pd.Timestamp(start), pd.Timestamp(end))\n",
- " # df = nyse.sessions_in_range(pd.Timestamp(start),\n",
- " # pd.Timestamp(end))\n",
- " trading_days = []\n",
- " for day in df:\n",
- " trading_days.append(str(day)[:10])\n",
- "\n",
- " return trading_days\n",
- "\n",
- "def alpaca_history(key, secret, url, start, end):\n",
- " api = tradeapi.REST(key, secret, url, 'v2')\n",
- " trading_days = get_trading_days(start, end)\n",
- " df = pd.DataFrame()\n",
- " for day in trading_days:\n",
- " #df = df.append(api.get_portfolio_history(date_start = day,timeframe='5Min').df.iloc[:78])\n",
- " df= pd.concat([df,api.get_portfolio_history(date_start = day,timeframe='5Min').df.iloc[:78]],ignore_index=True)\n",
- " \n",
- " equities = df.equity.values\n",
- " cumu_returns = equities/equities[0]\n",
- " cumu_returns = cumu_returns[~np.isnan(cumu_returns)]\n",
- " \n",
- " return df, cumu_returns\n",
- "\n",
- "def DIA_history(start):\n",
- " data_df = yf.download(['^DJI'],start=start, interval=\"5m\")\n",
- " data_df = data_df.iloc[:]\n",
- " baseline_returns = data_df['Adj Close'].values/data_df['Adj Close'].values[0]\n",
- " return data_df, baseline_returns"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "5CHiZRVpURpx"
- },
- "source": [
- "## Get cumulative return"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "O_YT7v-LSdfV"
- },
- "outputs": [],
- "source": [
- "df_erl, cumu_erl = alpaca_history(key=API_KEY, \n",
- " secret=API_SECRET, \n",
- " url=API_BASE_URL, \n",
- " start='2022-09-01', #must be within 1 month\n",
- " end='2022-09-12') #change the date if error occurs\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "IMcQjwHOS6Zb",
- "outputId": "1fb21460-1da9-4998-f0c0-fcbf5b056e66"
- },
- "outputs": [],
- "source": [
- "df_djia, cumu_djia = DIA_history(start='2022-09-01')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 238
- },
- "id": "PJXPwmx9Ts5o",
- "outputId": "c59014eb-c2f9-4be2-8a87-7892cc0b1094"
- },
- "outputs": [],
- "source": [
- "df_erl.tail()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
+ ],
+ "metadata": {
"colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "o1Iaw90FTNfU",
- "outputId": "0629dca2-d9dd-4c2a-e363-dc0f01daba41"
- },
- "outputs": [],
- "source": [
- "returns_erl = cumu_erl -1 \n",
- "returns_dia = cumu_djia - 1\n",
- "returns_dia = returns_dia[:returns_erl.shape[0]]\n",
- "print('len of erl return: ', returns_erl.shape[0])\n",
- "print('len of dia return: ', returns_dia.shape[0])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "2IawaMsDwZni"
- },
- "outputs": [],
- "source": [
- "returns_erl"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "5Z0LEm7KUZ5W"
- },
- "source": [
- "## plot and save"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "Foqk1wIQTQJ3"
- },
- "outputs": [],
- "source": [
- "import matplotlib.pyplot as plt\n",
- "plt.figure(dpi=1000)\n",
- "plt.grid()\n",
- "plt.grid(which='minor', axis='y')\n",
- "plt.title('Stock Trading (Paper trading)', fontsize=20)\n",
- "plt.plot(returns_erl, label = 'ElegantRL Agent', color = 'red')\n",
- "#plt.plot(returns_sb3, label = 'Stable-Baselines3 Agent', color = 'blue' )\n",
- "#plt.plot(returns_rllib, label = 'RLlib Agent', color = 'green')\n",
- "plt.plot(returns_dia, label = 'DJIA', color = 'grey')\n",
- "plt.ylabel('Return', fontsize=16)\n",
- "plt.xlabel('Year 2021', fontsize=16)\n",
- "plt.xticks(size = 14)\n",
- "plt.yticks(size = 14)\n",
- "ax = plt.gca()\n",
- "ax.xaxis.set_major_locator(ticker.MultipleLocator(78))\n",
- "ax.xaxis.set_minor_locator(ticker.MultipleLocator(6))\n",
- "ax.yaxis.set_minor_locator(ticker.MultipleLocator(0.005))\n",
- "ax.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=1, decimals=2))\n",
- "ax.xaxis.set_major_formatter(ticker.FixedFormatter(['','10-19','','10-20',\n",
- " '','10-21','','10-22']))\n",
- "plt.legend(fontsize=10.5)\n",
- "plt.savefig('papertrading_stock.png')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "O_LsHVj_TZGL"
- },
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "colab": {
- "collapsed_sections": [
- "0EVJIQUR6_fu",
- "9tzAw9k26nAC",
- "zjLda8No6pvI",
- "pf5aVHAU-xF6",
- "rZMkcyjZ-25l",
- "3rwy7V72-8YY",
- "J25MuZLiGqCP",
- "eW0UDAXI1nEa",
- "UFoxkigg1zXa"
- ],
- "provenance": []
- },
- "gpuClass": "standard",
- "kernelspec": {
- "display_name": "Python 3.8.10 ('venv': venv)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.11"
+ "collapsed_sections": [
+ "0EVJIQUR6_fu",
+ "9tzAw9k26nAC",
+ "zjLda8No6pvI",
+ "pf5aVHAU-xF6",
+ "rZMkcyjZ-25l",
+ "3rwy7V72-8YY",
+ "J25MuZLiGqCP",
+ "eW0UDAXI1nEa",
+ "UFoxkigg1zXa"
+ ],
+ "provenance": [],
+ "include_colab_link": true
+ },
+ "gpuClass": "standard",
+ "kernelspec": {
+ "display_name": "Python 3.8.10 ('venv': venv)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.11"
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "005d14239094016f48a03a57365c4ccb734e3f38c20ed0ca595d84f773bc39cd"
+ }
+ }
},
- "vscode": {
- "interpreter": {
- "hash": "005d14239094016f48a03a57365c4ccb734e3f38c20ed0ca595d84f773bc39cd"
- }
- }
- },
- "nbformat": 4,
- "nbformat_minor": 1
+ "nbformat": 4,
+ "nbformat_minor": 0
}
diff --git a/examples/Stock_NeurIPS2018_SB3.ipynb b/examples/Stock_NeurIPS2018_SB3.ipynb
index 903e196d05..48f0e16695 100644
--- a/examples/Stock_NeurIPS2018_SB3.ipynb
+++ b/examples/Stock_NeurIPS2018_SB3.ipynb
@@ -224,15 +224,6 @@
],
"source": [
"## install required packages\n",
- "\n",
- "!pip install swig\n",
- "!pip install wrds\n",
- "!pip install pyportfolioopt\n",
- "## install finrl library\n",
- "!pip install -q condacolab\n",
- "import condacolab\n",
- "condacolab.install()\n",
- "!apt-get update -y -qq && apt-get install -y -qq cmake libopenmpi-dev python3-dev zlib1g-dev libgl1-mesa-glx swig\n",
"!pip install git+https://github.com/AI4Finance-Foundation/FinRL.git"
]
},
diff --git a/finrl/meta/data_processors/processor_alpaca.py b/finrl/meta/data_processors/processor_alpaca.py
index 0a82100073..7734e9def0 100644
--- a/finrl/meta/data_processors/processor_alpaca.py
+++ b/finrl/meta/data_processors/processor_alpaca.py
@@ -14,11 +14,9 @@
from alpaca.data.timeframe import TimeFrame
from stockstats import StockDataFrame as Sdf
-# import alpaca_trade_api as tradeapi
-
class AlpacaProcessor:
- def __init__(self, API_KEY=None, API_SECRET=None, API_BASE_URL=None, client=None):
+ def __init__(self, API_KEY=None, API_SECRET=None, client=None):
if client is None:
try:
self.client = StockHistoricalDataClient(API_KEY, API_SECRET)
@@ -426,15 +424,19 @@ def fetch_latest_data(
# Now reset the index
barset.reset_index(inplace=True)
+ # If one column is returned, do not process
+ if len(barset.columns) <= 1:
+ continue
+
# Set 'timestamp' as the new index
if "level_0" in barset.columns:
barset.rename(columns={"level_0": "symbol"}, inplace=True)
- if "level_1" in bars.columns:
+ if "level_1" in barset.columns:
barset.rename(columns={"level_1": "timestamp"}, inplace=True)
barset.set_index("timestamp", inplace=True)
# Reorder and rename columns as needed
- barset = bars[
+ barset = barset[
[
"close",
"high",
diff --git a/finrl/meta/env_stock_trading/env_stock_papertrading.py b/finrl/meta/env_stock_trading/env_stock_papertrading.py
index db795b9811..b8e49a1290 100644
--- a/finrl/meta/env_stock_trading/env_stock_papertrading.py
+++ b/finrl/meta/env_stock_trading/env_stock_papertrading.py
@@ -4,11 +4,14 @@
import threading
import time
-import alpaca_trade_api as tradeapi
import gymnasium as gym
import numpy as np
import pandas as pd
import torch
+from alpaca.trading.client import TradingClient
+from alpaca.trading.enums import OrderSide
+from alpaca.trading.enums import QueryOrderStatus
+from alpaca.trading.requests import GetOrdersRequest
from finrl.meta.data_processors.processor_alpaca import AlpacaProcessor
@@ -26,7 +29,6 @@ def __init__(
action_dim,
API_KEY,
API_SECRET,
- API_BASE_URL,
tech_indicator_list,
turbulence_thresh=30,
max_stock=1e2,
@@ -98,7 +100,9 @@ def __init__(
# connect to Alpaca trading API
try:
- self.alpaca = tradeapi.REST(API_KEY, API_SECRET, API_BASE_URL, "v2")
+ self.alpaca = TradingClient(
+ api_key=API_KEY, secret_key=API_SECRET, paper=True
+ )
except:
raise ValueError(
"Fail to connect Alpaca. Please check account info and internet connection."
@@ -153,7 +157,11 @@ def test_latency(self, test_times=10):
return latency
def run(self):
- orders = self.alpaca.list_orders(status="open")
+ # params to filter orders by
+ request_params = GetOrdersRequest(status=QueryOrderStatus.OPEN)
+
+ # orders that satisfy params
+ orders = self.alpaca.get_orders(filter=request_params)
for order in orders:
self.alpaca.cancel_order(order.id)
@@ -180,7 +188,7 @@ def run(self):
"""# Close all positions when 1 minutes til market close.
print("Market closing soon. Closing positions.")
- positions = self.alpaca.list_positions()
+ positions = self.alpaca.get_all_positions()
for position in positions:
if(position.side == 'long'):
orderSide = 'sell'
@@ -278,7 +286,7 @@ def trade(self):
self.stocks_cd[index] = 0
else: # sell all when turbulence
- positions = self.alpaca.list_positions()
+ positions = self.alpaca.get_all_positions()
for position in positions:
if position.side == "long":
orderSide = "sell"
@@ -295,7 +303,7 @@ def trade(self):
self.stocks_cd[:] = 0
def get_state(self):
- alpaca = AlpacaProcessor(api=self.alpaca)
+ alpaca = AlpacaProcessor(API_KEY=API_KEY, API_SECRET=API_SECRET)
price, tech, turbulence = alpaca.fetch_latest_data(
ticker_list=self.stockUniverse,
time_interval="1Min",
@@ -308,7 +316,7 @@ def get_state(self):
).astype(np.float32)
tech = tech * 2**-7
- positions = self.alpaca.list_positions()
+ positions = self.alpaca.get_all_positions()
stocks = [0] * len(self.stockUniverse)
for position in positions:
ind = self.stockUniverse.index(position.symbol)
diff --git a/finrl/meta/paper_trading/alpaca.py b/finrl/meta/paper_trading/alpaca.py
index e614f6e810..68ffe58b3b 100644
--- a/finrl/meta/paper_trading/alpaca.py
+++ b/finrl/meta/paper_trading/alpaca.py
@@ -6,11 +6,14 @@
import threading
import time
-import alpaca_trade_api as tradeapi
-import gym
+import gymnasium as gym
import numpy as np
import pandas as pd
import torch
+from alpaca.trading.client import TradingClient
+from alpaca.trading.enums import OrderSide
+from alpaca.trading.enums import QueryOrderStatus
+from alpaca.trading.requests import GetOrdersRequest
from finrl.meta.data_processors.processor_alpaca import AlpacaProcessor
from finrl.meta.paper_trading.common import AgentPPO
@@ -94,7 +97,9 @@ def __init__(
# connect to Alpaca trading API
try:
- self.alpaca = tradeapi.REST(API_KEY, API_SECRET, API_BASE_URL, "v2")
+ self.alpaca = TradingClient(
+ api_key=API_KEY, secret_key=SECRET_KEY, paper=True
+ )
except:
raise ValueError(
"Fail to connect Alpaca. Please check account info and internet connection."
@@ -145,7 +150,11 @@ def test_latency(self, test_times=10):
return latency
def run(self):
- orders = self.alpaca.list_orders(status="open")
+ # params to filter orders by
+ request_params = GetOrdersRequest(status=QueryOrderStatus.OPEN)
+
+ # orders that satisfy params
+ orders = self.alpaca.get_orders(filter=request_params)
for order in orders:
self.alpaca.cancel_order(order.id)
@@ -168,7 +177,7 @@ def run(self):
print("Market closing soon. Closing positions.")
threads = []
- positions = self.alpaca.list_positions()
+ positions = self.alpaca.get_all_positions()
for position in positions:
if position.side == "long":
orderSide = "sell"
@@ -280,7 +289,7 @@ def trade(self):
else: # sell all when turbulence
threads = []
- positions = self.alpaca.list_positions()
+ positions = self.alpaca.get_all_positions()
for position in positions:
if position.side == "long":
orderSide = "sell"
@@ -313,7 +322,7 @@ def get_state(self):
).astype(np.float32)
tech = tech * 2**-7
- positions = self.alpaca.list_positions()
+ positions = self.alpaca.get_all_positions()
stocks = [0] * len(self.stockUniverse)
for position in positions:
ind = self.stockUniverse.index(position.symbol)
diff --git a/finrl/meta/paper_trading/common.py b/finrl/meta/paper_trading/common.py
index aae4c52220..cfc247a6cb 100644
--- a/finrl/meta/paper_trading/common.py
+++ b/finrl/meta/paper_trading/common.py
@@ -798,7 +798,9 @@ def test(
# -----------------------------------------------------------------------------------------------------------------------------------------
-import alpaca_trade_api as tradeapi
+from alpaca.trading.client import TradingClient
+from alpaca.trading.models import TradeAccount, PortfolioHistory
+from alpaca.trading.requests import GetOrdersRequest, GetPortfolioHistoryRequest
import pandas_market_calendars as tc
import numpy as np
import pandas as pd
@@ -817,14 +819,30 @@ def get_trading_days(start, end):
return trading_days
-def alpaca_history(key, secret, url, start, end):
- api = tradeapi.REST(key, secret, url, "v2")
+def alpaca_history(key, secret, start, end):
+ trading_client = TradingClient(key, secret, paper=True)
+ trade_account = trading_client.get_account()
trading_days = get_trading_days(start, end)
df = pd.DataFrame()
for day in trading_days:
- df = df.append(
- api.get_portfolio_history(date_start=day, timeframe="5Min").df.iloc[:78]
+ portfoil_history_request = GetPortfolioHistoryRequest(start=day, timeframe="1D")
+ portfoil_history = trading_client.get_portfolio_history(
+ history_filter=portfoil_history_request
)
+ # Create a DataFrame from the relevant parts of the JSON
+ df = pd.DataFrame(
+ {
+ "timestamp": portfoil_history.timestamp,
+ "equity": portfoil_history.equity,
+ "profit_loss": portfoil_history.profit_loss,
+ "profit_loss_pct": portfoil_history.profit_loss_pct,
+ }
+ )
+
+ # Convert the 'timestamp' column to a readable datetime format
+ df["timestamp"] = pd.to_datetime(df["timestamp"], unit="s")
+ # df= pd.concat([df, portfoil_history.df.iloc[:78]],ignore_index=True)
+
equities = df.equity.values
cumu_returns = equities / equities[0]
cumu_returns = cumu_returns[~np.isnan(cumu_returns)]
@@ -835,7 +853,7 @@ def alpaca_history(key, secret, url, start, end):
def DIA_history(start):
data_df = yf.download(["^DJI"], start=start, interval="5m")
data_df = data_df.iloc[:]
- baseline_returns = data_df["Adj Close"].values / data_df["Adj Close"].values[0]
+ baseline_returns = data_df["Close"].values / data_df["Close"].values[0]
return data_df, baseline_returns
diff --git a/pyproject.toml b/pyproject.toml
index 4b133d53fa..18fbdfda90 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,7 +29,6 @@ github = "https://github.com/finrl/finrl-library"
[tool.poetry.dependencies]
python = "^3.7"
elegantrl = {git="https://github.com/AI4Finance-Foundation/ElegantRL.git#egg=elegantrl"}
-alpaca-trade-api = "^3"
ccxt = "^3"
jqdatasdk = "^1"
pyfolio-reloaded = "^0.9"
diff --git a/requirements.txt b/requirements.txt
index 8220f94c46..cdd7fc4682 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,4 @@
alpaca-py
-alpaca_trade_api>=2.1.0
ccxt>=1.66.32
elegantrl