ready to pypi

marcocspc · marcocspc · commit 70dbae8df219 · 2020-05-09T22:38:58.000-03:00
diff --git a/README.md b/README.md
@@ -24,6 +24,8 @@ pip3 install git+https://github.com/pvnetto/URNAI-Tools/
 
 The basic installation will install all the *basic* required dependencies, including OpenAI Gym and SC2LE. But for other supported environments, you will need to install them for yourself. We describe how to do this on the next section. 
 
+To use tensorflowp-cpu instead of gpu, go to Optional below.
+
 ### Optional
 
 #### Starcraft II
@@ -48,6 +50,10 @@ set "URNAI_2048=1" && pip3 install urnai
 
 #### VizDoom
 
+Before setting urnai to install vizdoom, please see if you have all dependencies installed.
+
+Go [here] first.(https://github.com/mwydmuch/ViZDoom/blob/master/doc/Building.md#deps)
+
 To install urnai with vizdoom support, use:
 
 - On Unix:
@@ -87,17 +93,39 @@ URNAI_DEEPRTS=1 URNAI_VIZDOOM=1 URNAI_2048=1 pip3 install urnai
 ```
 set "URNAI_DEEPRTS=1" && set "URNAI_VIZDOOM=1" && set "URNAI_2048=1" && pip3 install urnai 
 ```
+#### Tensorflow CPU
+
+By default, urnai depdens on tensorflow-gpu, to use tf-cpu instead, use:
+
+- On Unix:
+```
+URNAI_TF_CPU=1 pip3 install urnai 
+```
 
+- On Windows:
+```
+set "URNAI_TF_CPU=1" && pip3 install urnai 
+```
 ### Running the examples
 
-To execute any of the examples we've included, just navigate to the project's folder and run them using Python.
+From version 0.0.2+ you can use json-files:
+
+```
+git clone https://github.com/marcocspc/URNAI-Tools 
+cd 'URNAI-Tools/urnai/test/solves'
+urnai train --json-file=solve_x.json
+```
+
+## Command line
+
+You can now use urnai on command line. Commands:
 
+To see what you can do, use:
 ```
-cd 'project/save/path'
-python solve_x.py
+urnai -h
 ```
 
-## Guide
+## Building your own code
 
 Follow these instructions to start developing new stuff using our library.
 
@@ -139,7 +167,6 @@ Here you'll find all the things that we plan to do in this project. **Bold** ite
   * [X] Frozenlake
   * [X] Cartpole-V0
   * [X] Cartpole-V1
-  * [X] Taxi-V2
   * [ ] Flappy Bird
   * [ ] **StarCraft II - Simple 64 Map - Very Easy difficulty**
 
diff --git a/urnai/agents/rewards/vizdoom.py b/urnai/agents/rewards/vizdoom.py
@@ -30,18 +30,27 @@ def __init__(self):
     POSITION_Y = 15
     POSITION_Z = 16 
     GENERAL_REWARD = 17
+    
+    METHOD_CUMULATIVE = "cumulative"
+    METHOD_DIFFERENCE = "difference"
+    METHOD_POSITIVE_ONLY = "positive_only"
+
+    def __init__(self, method):
+        self.method = method 
 
     def get_reward(self, obs, reward, done):
         r = 0
         
-        #r += -10 * obs.game_variables[VizDoomHealthGatheringReward.DEAD]
-        #r += 15 * obs.game_variables[VizDoomHealthGatheringReward.ITEMCOUNT] 
-        r += obs.game_variables[VizDoomHealthGatheringReward.HEALTH] - self.prev_health 
-        #r += -10 * reward
-
-        if r >0: r *= 5
-
-        self.prev_health = obs.game_variables[VizDoomHealthGatheringReward.HEALTH]
+        if method == VizDoomHealthGatheringReward.METHOD_CUMULATIVE:
+            r += obs.game_variables[VizDoomHealthGatheringReward.HEALTH]
+        elif method == VizDoomHealthGatheringReward.METHOD_DIFFERENCE:
+            r += obs.game_variables[VizDoomHealthGatheringReward.HEALTH] - self.prev_health 
+            self.prev_health = obs.game_variables[VizDoomHealthGatheringReward.HEALTH]
+        elif method == VizDoomHealthGatheringReward.METHOD_POSITIVE_ONLY:
+            r += obs.game_variables[VizDoomHealthGatheringReward.HEALTH] - self.prev_health 
+            self.prev_health = obs.game_variables[VizDoomHealthGatheringReward.HEALTH]
+
+            if r < 0: r = 0
 
         return r
 
diff --git a/urnai/test/solves/solve_vizdoom.json b/urnai/test/solves/solve_vizdoom.json
@@ -58,101 +58,13 @@
         },
         "reward" : {
             "class" : "VizDoomHealthGatheringReward",
-            "params" : {} 
-        },
-        "agent" : {
-            "class" : "GenericAgent",
-            "params" : {} 
-        },
-        "trainer" : {
-            "class" : "Trainer",
             "params" : {
-                "file_name" : "vizdoom_jsontrainer_test",
-                "save_every" : "100",
-                "enable_save" : true
-            }
-        },
-        "json_trainer" : {
-            "train": {
-                "num_episodes" : 3000,
-                "reward_from_env" : true,
-                "max_steps" : 500
-            },
-            "play" : {
-                "num_matches" : 100
-            }
-        }
-    },
-    {
-        "env" : {
-            "class" : "VizdoomEnv",
-            "params" : {
-                "wad" : "/Users/marcocspc/git/URNAI-Tools/urnai/utils/vizdoomwads/health_gathering.wad",
-                "render" : true,
-                "doommap" : null
-            }
-        },
-        "action-wrapper" : {
-            "class" : "VizdoomHealthGatheringWrapper",
-            "params" : {} 
-        },
-        "state_builder" : {
-            "class" : "VizDoomHealthGatheringState",
-            "params" : {
-                "screen_width" : 160,
-                "screen_height" : 120
-            }
-        },
-        "model" : {
-            "class" : "DQNKerasMem",
-            "params" : {
-                "learning_rate" : 0.005,
-                "gamma" : 0.9,
-                "use_memory" : false,
-                "per_episode_epsilon_decay" : true,
-                "build_model" : [
-                    {
-                        "type": "conv", 
-                        "filters": 32, 
-                        "filter_shape": [3, 3], 
-                        "padding": "same", 
-                        "name": "default0", 
-                        "input_shape": [120, 160, 1], 
-                        "max_pooling_pool_size_shape": [2, 2]
-                    }, 
-                    {
-                        "type": "conv", 
-                        "filters": 32, 
-                        "filter_shape": [3, 3], 
-                        "padding": "same", 
-                        "name": "default1", 
-                        "input_shape": null, 
-                        "max_pooling_pool_size_shape": [2, 2]
-                    }, 
-                    {
-                        "type": "fullyconn", 
-                        "nodes": 256, 
-                        "name": "default2"
-                    }, 
-                    {
-                        "type": "fullyconn", 
-                        "nodes": 50, 
-                        "name": "default3"
-                    }, 
-                    {
-                        "type": "output", 
-                        "length": 3
-                    }
-                ]
-             }
-        },
-        "reward" : {
-            "class" : "VizDoomHealthGatheringReward",
-            "params" : {}
+                "method" : "cumulative"
+            } 
         },
         "agent" : {
             "class" : "GenericAgent",
-            "params" : {}
+            "params" : {} 
         },
         "trainer" : {
             "class" : "Trainer",