diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..c4516c1e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,22 @@
+# Repo create
+data/
+log/
+output/
+pretrained_models/
+
+# Generated by vscode
+.vscode
+__pycache__/
+build/
+
+# Custom
+detail-api/
+*.py[co]
+/temp*/
+/temp*.*/
+
+# Unknown
+scripts/
+
+# Model parameters
+*.pth
\ No newline at end of file
diff --git a/README.md b/README.md
index 5372054c..4651eca1 100644
--- a/README.md
+++ b/README.md
@@ -76,7 +76,7 @@ Performance on the Cityscapes dataset. The models are trained and tested with th
 
 ## Quick start
 ### Install
-1. Install PyTorch=0.4.1 following the [official instructions](https://pytorch.org/)
+1. Install PyTorch=0.4.1 following the [official instructions](https://pytorch.org/) or [this](torch_installation.md)
 2. git clone https://github.com/HRNet/HRNet-Semantic-Segmentation $SEG_ROOT
 3. Install dependencies: pip install -r requirements.txt
 
diff --git a/lib/models/sync_bn/inplace_abn/__pycache__/__init__.cpython-36.pyc b/lib/models/sync_bn/inplace_abn/__pycache__/__init__.cpython-36.pyc
deleted file mode 100644
index c42d09f5..00000000
Binary files a/lib/models/sync_bn/inplace_abn/__pycache__/__init__.cpython-36.pyc and /dev/null differ
diff --git a/lib/models/sync_bn/inplace_abn/__pycache__/bn.cpython-36.pyc b/lib/models/sync_bn/inplace_abn/__pycache__/bn.cpython-36.pyc
deleted file mode 100644
index 77d9df51..00000000
Binary files a/lib/models/sync_bn/inplace_abn/__pycache__/bn.cpython-36.pyc and /dev/null differ
diff --git a/lib/models/sync_bn/inplace_abn/__pycache__/functions.cpython-36.pyc b/lib/models/sync_bn/inplace_abn/__pycache__/functions.cpython-36.pyc
deleted file mode 100644
index 98a64da0..00000000
Binary files a/lib/models/sync_bn/inplace_abn/__pycache__/functions.cpython-36.pyc and /dev/null differ
diff --git a/lib/models/sync_bn/inplace_abn/functions.py b/lib/models/sync_bn/inplace_abn/functions.py
index 17d11077..b222d4d3 100644
--- a/lib/models/sync_bn/inplace_abn/functions.py
+++ b/lib/models/sync_bn/inplace_abn/functions.py
@@ -1,3 +1,4 @@
+import os
 from os import path
 
 import torch.autograd as autograd
@@ -6,14 +7,20 @@
 from torch.utils.cpp_extension import load
 
 _src_path = path.join(path.dirname(path.abspath(__file__)), "src")
-_backend = load(name="inplace_abn",
-                extra_cflags=["-O3"],
-                sources=[path.join(_src_path, f) for f in [
-                    "inplace_abn.cpp",
-                    "inplace_abn_cpu.cpp",
-                    "inplace_abn_cuda.cu"
-                ]],
-                extra_cuda_cflags=["--expt-extended-lambda"])
+build_folder = path.join(path.split(_src_path)[0], "build")
+
+if not path.exists(build_folder):
+    os.makedirs(build_folder, 0o777)
+
+_backend = load(
+    name="inplace_abn",
+    extra_cflags=["-O3"],
+    sources=[
+        path.join(_src_path, f) for f in
+        ["inplace_abn.cpp", "inplace_abn_cpu.cpp", "inplace_abn_cuda.cu"]
+    ],
+    extra_cuda_cflags=["--expt-extended-lambda"],
+    build_directory=build_folder)
 
 # Activation names
 ACT_RELU = "relu"
@@ -73,9 +80,19 @@ def _act_backward(ctx, x, dx):
 
 
 class InPlaceABN(autograd.Function):
+
     @staticmethod
-    def forward(ctx, x, weight, bias, running_mean, running_var,
-                training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
+    def forward(ctx,
+                x,
+                weight,
+                bias,
+                running_mean,
+                running_var,
+                training=True,
+                momentum=0.1,
+                eps=1e-05,
+                activation=ACT_LEAKY_RELU,
+                slope=0.01):
         # Save context
         ctx.training = training
         ctx.momentum = momentum
@@ -95,7 +112,8 @@ def forward(ctx, x, weight, bias, running_mean, running_var,
 
             # Update running stats
             running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
-            running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1))
+            running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var *
+                                                      count / (count - 1))
 
             # Mark in-place modified tensors
             ctx.mark_dirty(x, running_mean, running_var)
@@ -122,13 +140,15 @@ def backward(ctx, dz):
         _act_backward(ctx, z, dz)
 
         if ctx.training:
-            edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
+            edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine,
+                                          ctx.eps)
         else:
             # TODO: implement simplified CUDA backward for inference mode
             edz = dz.new_zeros(dz.size(1))
             eydz = dz.new_zeros(dz.size(1))
 
-        dx, dweight, dbias = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
+        dx, dweight, dbias = _backend.backward(z, dz, var, weight, bias, edz,
+                                               eydz, ctx.affine, ctx.eps)
         dweight = dweight if ctx.affine else None
         dbias = dbias if ctx.affine else None
 
@@ -136,9 +156,21 @@ def backward(ctx, dz):
 
 
 class InPlaceABNSync(autograd.Function):
+
     @classmethod
-    def forward(cls, ctx, x, weight, bias, running_mean, running_var,
-                extra, training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
+    def forward(cls,
+                ctx,
+                x,
+                weight,
+                bias,
+                running_mean,
+                running_var,
+                extra,
+                training=True,
+                momentum=0.1,
+                eps=1e-05,
+                activation=ACT_LEAKY_RELU,
+                slope=0.01):
         # Save context
         cls._parse_extra(ctx, extra)
         ctx.training = training
@@ -169,9 +201,10 @@ def forward(cls, ctx, x, weight, bias, running_mean, running_var,
                 vars = comm.gather(vars)
 
                 mean = means.mean(0)
-                var = (vars + (mean - means) ** 2).mean(0)
+                var = (vars + (mean - means)**2).mean(0)
 
-                tensors = comm.broadcast_coalesced((mean, var), [mean.get_device()] + ctx.worker_ids)
+                tensors = comm.broadcast_coalesced(
+                    (mean, var), [mean.get_device()] + ctx.worker_ids)
                 for ts, queue in zip(tensors[1:], ctx.worker_queues):
                     queue.put(ts)
             else:
@@ -181,7 +214,8 @@ def forward(cls, ctx, x, weight, bias, running_mean, running_var,
 
             # Update running stats
             running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
-            running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1))
+            running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var *
+                                                      count / (count - 1))
 
             # Mark in-place modified tensors
             ctx.mark_dirty(x, running_mean, running_var)
@@ -208,7 +242,8 @@ def backward(ctx, dz):
         _act_backward(ctx, z, dz)
 
         if ctx.training:
-            edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
+            edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine,
+                                          ctx.eps)
 
             if ctx.is_master:
                 edzs, eydzs = [edz], [eydz]
@@ -221,7 +256,8 @@ def backward(ctx, dz):
                 edz = comm.reduce_add(edzs) / (ctx.master_queue.maxsize + 1)
                 eydz = comm.reduce_add(eydzs) / (ctx.master_queue.maxsize + 1)
 
-                tensors = comm.broadcast_coalesced((edz, eydz), [edz.get_device()] + ctx.worker_ids)
+                tensors = comm.broadcast_coalesced(
+                    (edz, eydz), [edz.get_device()] + ctx.worker_ids)
                 for ts, queue in zip(tensors[1:], ctx.worker_queues):
                     queue.put(ts)
             else:
@@ -232,7 +268,8 @@ def backward(ctx, dz):
             edz = dz.new_zeros(dz.size(1))
             eydz = dz.new_zeros(dz.size(1))
 
-        dx, dweight, dbias = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
+        dx, dweight, dbias = _backend.backward(z, dz, var, weight, bias, edz,
+                                               eydz, ctx.affine, ctx.eps)
         dweight = dweight if ctx.affine else None
         dbias = dbias if ctx.affine else None
 
@@ -253,4 +290,7 @@ def _parse_extra(ctx, extra):
 inplace_abn = InPlaceABN.apply
 inplace_abn_sync = InPlaceABNSync.apply
 
-__all__ = ["inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU", "ACT_NONE"]
+__all__ = [
+    "inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU",
+    "ACT_NONE"
+]
diff --git a/torch_installation.md b/torch_installation.md
new file mode 100644
index 00000000..fbeff2d4
--- /dev/null
+++ b/torch_installation.md
@@ -0,0 +1,21 @@
+# How to install Pytorch==0.4.1 with cuda support.
+
+Firstly, you can initialize a new conda environment. Note that you'd better create a environment based on python 3.6.
+
+```
+conda create -n hrnet python=3.6
+```
+
+Then you can manually install cudatoolkit 9.0.
+
+```
+conda install cudatoolkit=9.0 -c https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/linux-64/
+```
+
+Then install Pytorch-0.4.1.
+
+```
+pip install http://download.pytorch.org/whl/cu90/torch-0.4.1-cp36-cp36m-linux_x86_64.whl
+```
+
+Then install other modules like: Cython, opencv-python and so on.
\ No newline at end of file