diff --git a/demo/README.md b/demo/README.md new file mode 100644 index 0000000..15bfb67 --- /dev/null +++ b/demo/README.md @@ -0,0 +1,17 @@ +Ray Live Demo +============= + +This demo can be run live while presenting to show Ray's capabilities. + +## Instructions +1. Launch a Ray cluster on AWS with `ray up cluster_config.yaml` + - To run locally, install the requirements with `pip install -r requirements.txt` +2. Connect to the head node + - I recommend using SSH with port forwarding in order to use Jupyter, Ray Dashboard, and Tensorboard without compromising security + - For example, `ssh -L 9999:127.0.0.1:8889 -L 9998:127.0.0.1:8080 -L 9997:127.0.0.1:6006 ubuntu@12.123.123.123` should map Jupyter to `127.0.0.1:9999`, Ray Dashboard to `127.0.0.1:9998` and Tensorboard to `127.0.0.1:9997` +2. Open the jupyter notebooks on the cluster and set the `CLUSTER_ADDRESS` parameter in `ray_api_demo.ipynb` and `rllib_demo.ipynb` +3. Also set links for Ray Dashboard and Tensorboard +4. Run the live-coding presentation with [rise](https://rise.readthedocs.io/) + - Start a presentation with `Alt-r` or by pressing the button in the top right of the toolbar + - Use `SpaceBar` to navigate to the next slide and `Shift-SpaceBar` to navigate to the previous slide + - Use `Shift-Enter` to run the code in a cell diff --git a/demo/cluster_config.yaml b/demo/cluster_config.yaml new file mode 100644 index 0000000..6d85f5b --- /dev/null +++ b/demo/cluster_config.yaml @@ -0,0 +1,114 @@ +# An unique identifier for the head node and workers of this cluster. +cluster_name: ray-demo + +# The minimum number of workers nodes to launch in addition to the head +# node. This number should be >= 0. +min_workers: 2 + +# The maximum number of workers nodes to launch in addition to the head +# node. This takes precedence over min_workers. +max_workers: 5 + +# The initial number of worker nodes to launch in addition to the head +# node. When the cluster is first brought up (or when it is refreshed with a +# subsequent `ray up`) this number of nodes will be started. +initial_workers: 2 + +# Whether or not to autoscale aggressively. If this is enabled, if at any point +# we would start more workers, we start at least enough to bring us to +# initial_workers. +autoscaling_mode: default + +# The autoscaler will scale up the cluster to this target fraction of resource +# usage. For example, if a cluster of 10 nodes is 100% busy and +# target_utilization is 0.8, it would resize the cluster to 13. This fraction +# can be decreased to increase the aggressiveness of upscaling. +# This value must be less than 1.0 for scaling to happen. +target_utilization_fraction: 0.8 + +# If a node is idle for this many minutes, it will be removed. +idle_timeout_minutes: 5 + +# Cloud-provider specific configuration. +provider: + type: aws + region: us-west-2 + # Availability zone(s), comma-separated, that nodes may be launched in. + # Nodes are currently spread between zones by a round-robin approach, + # however this implementation detail should not be relied upon. + availability_zone: us-west-2a,us-west-2b + +# How Ray will authenticate with newly launched nodes. +auth: + ssh_user: ubuntu +# By default Ray creates a new private keypair, but you can also use your own. +# If you do so, make sure to also set "KeyName" in the head and worker node +# configurations below. +# ssh_private_key: /path/to/your/key.pem + +# Provider-specific config for the head node, e.g. instance type. By default +# Ray will auto-configure unspecified fields such as SubnetId and KeyName. +# For more documentation on available fields, see: +# http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances +head_node: + InstanceType: m5.large + ImageId: ami-02c253ecf7eaba73e # Deep Learning AMI (Ubuntu 16.04) Version 24.2 + + # You can provision additional disk space with a conf as follows + BlockDeviceMappings: + - DeviceName: /dev/sda1 + Ebs: + VolumeSize: 100 + + # Additional options in the boto docs. + +# Provider-specific config for worker nodes, e.g. instance type. By default +# Ray will auto-configure unspecified fields such as SubnetId and KeyName. +# For more documentation on available fields, see: +# http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances +worker_nodes: + InstanceType: m5.large + ImageId: ami-0b294f219d14e6a82 # Deep Learning AMI (Ubuntu) Version 21.0 + + # Run workers on spot by default. Comment this out to use on-demand. + InstanceMarketOptions: + MarketType: spot + # Additional options can be found in the boto docs, e.g. + # SpotOptions: + # MaxPrice: MAX_HOURLY_PRICE + + # Additional options in the boto docs. + +# Files or directories to copy to the head and worker nodes. The format is a +# dictionary from REMOTE_PATH: LOCAL_PATH, e.g. +file_mounts: { + "/home/ubuntu/ray_api_demo.ipynb": "./ray_api_demo.ipynb", + "/home/ubuntu/rllib_demo.ipynb": "./rllib_demo.ipynb", + "/home/ubuntu/utils.py": "./utils.py", +} + +# List of shell commands to run to set up nodes. +setup_commands: + - echo 'export PATH="$HOME/anaconda3/envs/tensorflow_p36/bin:$PATH"' >> ~/.bashrc + - pip install -U ray ray[rllib] ray[tune] ray[debug] + - pip install aiohttp psutil gym opencv-python + - pip install lz4 + +# Custom commands that will be run on the head node after common setup. +head_setup_commands: + - pip install RISE + - pip install boto3==1.4.8 # 1.4.8 adds InstanceMarketOptions + - pip install -U tensorflow + +# Custom commands that will be run on worker nodes after common setup. +worker_setup_commands: [] + +# Command to start ray on the head node. You don't need to change this. +head_start_ray_commands: + - ray stop + - ulimit -n 65536; ray start --head --redis-port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml --include-webui + +# Command to start ray on worker nodes. You don't need to change this. +worker_start_ray_commands: + - ray stop + - ulimit -n 65536; ray start --redis-address=$RAY_HEAD_IP:6379 --object-manager-port=8076 diff --git a/demo/ray_api_demo.ipynb b/demo/ray_api_demo.ipynb new file mode 100644 index 0000000..2d4430d --- /dev/null +++ b/demo/ray_api_demo.ipynb @@ -0,0 +1,369 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "# Ray API Demo: Parallel Training\n", + "\n", + "Demo created by [Peter Schafhalter](https://github.com/pschafhalter/)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Training function\n", + "\n", + "Trains a classifier with a hyperparameter.\n", + "\n", + "Technical details:\n", + "- The classifier is a multi-layer perceptron.\n", + "- The hyperamater (alpha) is the regularization parameter.\n", + "- The value of alpha affects the [bias-variance tradeoff](https://en.wikipedia.org/wiki/Bias%E2%80%93variance_tradeoff), which impacts whether the model underfits or overfits." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "# Import the scikit-learn machine learning library\n", + "from sklearn.metrics import accuracy_score\n", + "from sklearn.neural_network import MLPClassifier\n", + "\n", + "def train(alpha, train_x, test_x, train_y, test_y):\n", + " # Instantiate a model with the given value of alpha\n", + " classifier = MLPClassifier(alpha=alpha, max_iter=10000)\n", + " # Train the model on the training data\n", + " classifier.fit(train_x, train_y)\n", + " # Evaluate the model on the test data and return the model's accuracy score\n", + " predicted_y = classifier.predict(test_x)\n", + " return accuracy_score(test_y, predicted_y)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Training with Hyperparameters\n", + "\n", + "- Try different values for alpha to train best model.\n", + "- Without parallelizing this is slow." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "# Imports for profiling and visualization\n", + "from IPython.core.display import HTML\n", + "import time\n", + "import tqdm\n", + "\n", + "from utils import ray_get_with_progress_bar\n", + "\n", + "# Imports for generating the dataset\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.datasets import make_moons\n", + "\n", + "# Generate the dataset\n", + "x, y = make_moons(noise=0.8, random_state=0)\n", + "\n", + "# Set hyperparameters\n", + "trials = [10**-x for x in range(16)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "# Train with different values for alpha\n", + "results = []\n", + "start = time.time()\n", + "for alpha in tqdm.tqdm(trials):\n", + " results.append(train(alpha, *train_test_split(x, y)))\n", + "serial_time = time.time() - start\n", + "\n", + "# Print results\n", + "HTML(f\"

Best accuracy: {max(results):.2f}

Total time: {serial_time:.2f} seconds

\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Parallel Training with Ray\n", + "\n", + "1. Import and set up ray with `ray.init()`\n", + "2. Add the `ray.remote` decorator to `train`\n", + "3. Replace calls to `train(...)` with `train.remote(...)`\n", + "4. Get the resulting python objects with `results = ray.get(results)`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "import ray\n", + "ray.shutdown()\n", + "ray.init()\n", + "\n", + "HTML(f\"\"\"

Started Ray locally with:

\n", + "

{ray.cluster_resources()[\"CPU\"] : .0f} CPUs

\n", + "

{ray.cluster_resources()[\"memory\"]} GB of memory available

\n", + "

{ray.cluster_resources()[\"object_store_memory\"]} GB of object store memory

\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "@ray.remote\n", + "def train(alpha, train_x, test_x, train_y, test_y):\n", + " classifier = MLPClassifier(alpha=alpha, max_iter=10000)\n", + " classifier.fit(train_x, train_y)\n", + " predicted_y = classifier.predict(test_x)\n", + " return accuracy_score(test_y, predicted_y)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "# Train with different values for alpha\n", + "results = []\n", + "start = time.time()\n", + "for alpha in trials:\n", + " # Call train with train.remote(...)\n", + " results.append(train.remote(alpha, *train_test_split(x, y)))\n", + "\n", + "# Get results\n", + "# results = ray.get(results) # This works just like the result below, but without progress bar\n", + "results = ray_get_with_progress_bar(results)\n", + "\n", + "parallel_time = time.time() - start\n", + "\n", + "# Print results\n", + "HTML(f\"\"\"

Best accuracy: {max(results):.2f}

\n", + "

Total time: {parallel_time:.2f} seconds ({serial_time / parallel_time : .2f}x faster)

\n", + "\"\"\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Parallel Training with Ray on a Cluster\n", + "\n", + "1. Launch a Ray cluster on AWS with `ray up cluster_config.yaml`\n", + "2. SSH into head node\n", + "3. Replace `ray.init()` with `ray.init(redis_address=\"...\")`\n", + "\n", + "The Ray Autoscalar can add and remove nodes as the workload changes. Currently, it integrates with AWS, GCP, Kubernetes, and private clusters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "# Connect to cluster\n", + "CLUSTER_ADDRESS = None # Set this to run on cluster\n", + "ray.shutdown()\n", + "ray.init(redis_address=CLUSTER_ADDRESS, include_webui=True)\n", + "\n", + "url = ray.get_webui_url() # Override this in case of SSH forwarding from the cluster\n", + "HTML(f\"\"\"

Connected to Ray cluster with:

\n", + "

{ray.cluster_resources()[\"CPU\"] : .0f} CPUs

\n", + "

{ray.cluster_resources()[\"memory\"]} GB of memory available

\n", + "

{ray.cluster_resources()[\"object_store_memory\"]} GB of object store memory

\n", + "
\n", + "Dashboard\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "# Train with different values for alpha\n", + "results = []\n", + "start = time.time()\n", + "for alpha in trials:\n", + " # Call train with train.remote(...)\n", + " results.append(train.remote(alpha, *train_test_split(x, y)))\n", + "\n", + "# Get results\n", + "# results = ray.get(results) # This works just like the result below, but without progress bar\n", + "results = ray_get_with_progress_bar(results)\n", + "\n", + "cluster_time = time.time() - start\n", + "\n", + "# Print results\n", + "HTML(f\"\"\"

Best accuracy: {max(results):.2f}

\n", + "

Total time: {cluster_time:.2f} seconds ({serial_time / parallel_time : .2f}x faster)

\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "# Train with different values for alpha\n", + "cluster_trials = [i for i in range(100)]\n", + "results = []\n", + "start = time.time()\n", + "for alpha in cluster_trials:\n", + " # Call train with train.remote(...)\n", + " results.append(train.remote(alpha, *train_test_split(x, y)))\n", + "\n", + "# Get results\n", + "# results = ray.get(results) # This works just like the result below, but without progress bar\n", + "results = ray_get_with_progress_bar(results)\n", + "\n", + "cluster_large_workload_time = time.time() - start\n", + "\n", + "# Print results\n", + "HTML(f\"\"\"

Best accuracy: {max(results):.2f}

\n", + "

Total time: {cluster_large_workload_time:.2f} seconds ({cluster_large_workload_time / serial_time : .2f}x slower)

\n", + "

Total trials: {len(cluster_trials)}\\t({round(len(cluster_trials) / len(trials))}x more trials)\"\"\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Summary" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "HTML(f\"\"\"

Time without Ray:\\t{serial_time:.2f} seconds

\n", + "

Time with Ray:\\t{parallel_time:.2f} seconds ({serial_time / parallel_time :.2f}x faster)

\n", + "

Time with Ray on Cluster:\\t{cluster_time:.2f} seconds ({serial_time / cluster_time :.2f}x faster)

\n", + "

Ran {round(len(cluster_trials) / len(trials))}x more trials on cluster in {cluster_large_workload_time:.2f} seconds ({cluster_large_workload_time / serial_time : .2f}x slower)\n", + "\"\"\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "source": [ + "Note: speedup doesn't exactly scale with cores due to overhead.\n", + "\n", + "Speedup tends to become linear as the number of tasks increases, or tasks become longer." + ] + } + ], + "metadata": { + "celltoolbar": "Slideshow", + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/demo/requirements.txt b/demo/requirements.txt new file mode 100644 index 0000000..b496529 --- /dev/null +++ b/demo/requirements.txt @@ -0,0 +1,102 @@ +absl-py==0.8.1 +aiohttp==3.6.2 +astor==0.8.0 +async-timeout==3.0.1 +atomicwrites==1.3.0 +attrs==19.2.0 +backcall==0.1.0 +bleach==3.1.0 +boto3==1.9.248 +botocore==1.12.248 +certifi==2019.9.11 +chardet==3.0.4 +Click==7.0 +cloudpickle==1.2.2 +colorama==0.4.1 +decorator==4.4.0 +defusedxml==0.6.0 +docutils==0.15.2 +entrypoints==0.3 +filelock==3.0.12 +funcsigs==1.0.2 +future==0.18.0 +gast==0.3.2 +google-pasta==0.1.7 +grpcio==1.24.1 +gym==0.15.3 +h5py==2.10.0 +idna==2.8 +idna-ssl==1.1.0 +importlib-metadata==0.23 +ipykernel==5.1.2 +ipython==7.8.0 +ipython-genutils==0.2.0 +jedi==0.15.1 +Jinja2==2.10.3 +jmespath==0.9.4 +joblib==0.14.0 +jsonschema==3.1.1 +jupyter-client==5.3.4 +jupyter-core==4.6.0 +Keras-Applications==1.0.8 +Keras-Preprocessing==1.1.0 +lz4==2.2.1 +Markdown==3.1.1 +MarkupSafe==1.1.1 +mistune==0.8.4 +more-itertools==7.2.0 +multidict==4.5.2 +nbconvert==5.6.0 +nbformat==4.4.0 +notebook==6.0.1 +numpy==1.17.2 +opencv-python==4.1.1.26 +packaging==19.2 +pandocfilters==1.4.2 +parso==0.5.1 +pexpect==4.7.0 +pickleshare==0.7.5 +pluggy==0.13.0 +prometheus-client==0.7.1 +prompt-toolkit==2.0.10 +protobuf==3.10.0 +psutil==5.6.3 +ptyprocess==0.6.0 +py==1.8.0 +py-spy==0.2.2 +pyglet==1.3.2 +Pygments==2.4.2 +pyparsing==2.4.2 +pyrsistent==0.15.4 +pytest==5.2.1 +python-dateutil==2.8.0 +PyYAML==5.1.2 +pyzmq==18.1.0 +ray==0.7.5 +redis==3.3.10 +requests==2.22.0 +rise==5.5.1 +s3transfer==0.2.1 +scikit-learn==0.21.3 +scipy==1.3.1 +Send2Trash==1.5.0 +setproctitle==1.1.10 +six==1.12.0 +sklearn==0.0 +tensorboard==1.14.0 +tensorflow==1.14.0 +tensorflow-estimator==1.14.0 +termcolor==1.1.0 +terminado==0.8.2 +testpath==0.4.2 +tornado==6.0.3 +tqdm==4.36.1 +traitlets==4.3.3 +typing-extensions==3.7.4 +urllib3==1.25.6 +wcwidth==0.1.7 +webencodings==0.5.1 +Werkzeug==0.16.0 +wrapt==1.11.2 +yarl==1.3.0 +zipp==0.6.0 diff --git a/demo/rllib_demo.ipynb b/demo/rllib_demo.ipynb new file mode 100644 index 0000000..8ae4c4a --- /dev/null +++ b/demo/rllib_demo.ipynb @@ -0,0 +1,197 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "# RLlib Demo: Solving Mountain Car\n", + "\n", + "\n", + "\n", + "Demo created by [Peter Schafhalter](https://github.com/pschafhalter/)\n", + "\n", + "Image source: [skyai.org](http://skyai.org/wiki/?plugin=ref&page=Documentation%2FTutorial%20-%20Example%20-%20Mountain%20Car&src=mountaincar.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "import ray\n", + "from ray import tune\n", + "from utils import MOUNTAINCAR_DEFAULT_CONFIG" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "CLUSTER_ADDRESS = None # Set this value to run on cluster\n", + "ray.init(redis_address=CLUSTER_ADDRESS, include_webui=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Configuring RLlib" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "```json\n", + "config = {\n", + " \"env\": \"MountainCarContinuous-v0\",\n", + " \"actor_hiddens\": [32, 64],\n", + " \"critic_hiddens\": [64, 64],\n", + " \"actor_lr\": 1e-3,\n", + " \"critic_lr\": 1e-3,\n", + " \"l2_reg\": 1e-5,\n", + "}\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "config = MOUNTAINCAR_DEFAULT_CONFIG.copy()\n", + "config" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Running an Experiment\n", + "\n", + "\n", + "[Tensorboard](http://127.0.0.1:6006)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "# Modify the l2 regularization hyperparameter\n", + "config[\"l2_reg\"] = 1e-1\n", + "\n", + "# Run the RL experiment using the DDPG algorithm\n", + "tune.run(\"DDPG\", stop={\n", + " \"time_total_s\": 60 # Stop after 60 seconds\n", + " },\n", + " config=config)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Searching for Hyperparameters\n", + "\n", + "1. Replace hyperparameter's value with `tune.grid_search([1, 2, 3, 4, 5])`\n", + " - `l2_reg=1e-1` becomes `l2_reg=tune.grid_search([1e-1, 1e-3, 1e-5, 1e-7])`\n", + " - Can also sample from a continuous PDF" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "# Run a grid search in parallel for the l2 regularization hyperparameter\n", + "config[\"l2_reg\"] = tune.grid_search([1e-1, 1e-3, 1e-5, 1e-7])\n", + "\n", + "tune.run(\"DDPG\", stop={\n", + " \"episode_reward_mean\": 90, # Stop if the algorithm achieves a mean reward of 90\n", + " \"time_total_s\": 600 # Stop after 600 seconds\n", + " },\n", + " config=config)" + ] + } + ], + "metadata": { + "celltoolbar": "Slideshow", + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/demo/utils.py b/demo/utils.py new file mode 100644 index 0000000..2339d41 --- /dev/null +++ b/demo/utils.py @@ -0,0 +1,54 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tqdm +import ray + +MOUNTAINCAR_DEFAULT_CONFIG = { + "env": "MountainCarContinuous-v0", + "actor_hiddens": [32, 64], + "critic_hiddens": [64, 64], + "n_step": 3, + "model": {}, + "gamma": 0.99, + "env_config": {}, + "exploration_should_anneal": True, + "schedule_max_timesteps": 100000, + "timesteps_per_iteration": 1000, + "exploration_fraction": 0.4, + "exploration_final_scale": 0.02, + "exploration_ou_noise_scale": 0.75, + "exploration_ou_theta": 0.15, + "exploration_ou_sigma": 0.2, + "target_network_update_freq": 0, + "tau": 0.01, + "buffer_size": 50000, + "prioritized_replay": False, + "prioritized_replay_alpha": 0.6, + "prioritized_replay_beta": 0.4, + "prioritized_replay_eps": 1.0e-06, + "clip_rewards": False, + "actor_lr": 0.001, + "critic_lr": 0.001, + "use_huber": False, + "huber_threshold": 1.0, + "l2_reg": 1.0e-05, + "learning_starts": 1000, + "sample_batch_size": 1, + "train_batch_size": 64, + "num_workers": 0, + "num_gpus_per_worker": 0, + "per_worker_exploration": False, + "worker_side_prioritization": False, + "evaluation_interval": 5, + "evaluation_num_episodes": 10 +} + + +def ray_get_with_progress_bar(object_ids): + ready = [] + remaining = object_ids + for _ in tqdm.tqdm(range(len(object_ids))): + ready, remaining = ray.wait(remaining) + return ray.get(ready)