diff --git a/Procfile b/Procfile
new file mode 100644
index 00000000..fc272ab9
--- /dev/null
+++ b/Procfile
@@ -0,0 +1 @@
+web: gunicorn wsgi:app
\ No newline at end of file
diff --git a/app.py b/app.py
new file mode 100644
index 00000000..91986221
--- /dev/null
+++ b/app.py
@@ -0,0 +1,44 @@
+from time import sleep
+from flask import Flask, render_template, request, send_file
+from werkzeug.utils import secure_filename
+from werkzeug.datastructures import  FileStorage
+import cv2
+import os
+import glob
+import inference_flask as util
+app = Flask(__name__)
+
+model, transform, device = util.load_model()
+
+@app.route('/')
+def r_upload_file():
+   return render_template('upload.html')
+	
+@app.route('/image', methods = ['GET', 'POST'])
+def image():
+   global model, transform, device
+   for file in glob.glob('./*'):
+      if file.endswith('.jpg') or file.endswith('.png') or file.endswith('jpeg'):
+         os.remove(file)
+   if request.method == 'POST':
+      f = request.files['file']
+      f.save(secure_filename(f.filename))
+      # inference
+      util.image_inference(model, transform, device, secure_filename(f.filename))
+      return send_file(secure_filename(f.filename))
+
+@app.route('/video', methods = ['GET', 'POST'])
+def video():
+   global model, transform, device
+   for file in glob.glob('./*'):
+      if file.endswith('.mp4') or file.endswith('.avi'):
+         os.remove(file)
+   if request.method == 'POST':
+      f = request.files['file']
+      f.save(secure_filename(f.filename))
+      # inference
+      util.video_inference(model, transform, device, secure_filename(f.filename))
+      return send_file(secure_filename(f.filename)+'.avi')
+		
+if __name__ == '__main__':
+   app.run(debug = False)
\ No newline at end of file
diff --git a/crowd_datasets/__pycache__/__init__.cpython-39.pyc b/crowd_datasets/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 00000000..2d372275
Binary files /dev/null and b/crowd_datasets/__pycache__/__init__.cpython-39.pyc differ
diff --git a/inference_flask.py b/inference_flask.py
new file mode 100644
index 00000000..45063371
--- /dev/null
+++ b/inference_flask.py
@@ -0,0 +1,168 @@
+import argparse
+import datetime
+import random
+import time
+from pathlib import Path
+from tqdm import tqdm
+
+import torch
+import torchvision.transforms as standard_transforms
+import numpy as np
+
+from PIL import Image
+import cv2
+from crowd_datasets import build_dataset
+from engine import *
+from models import build_model
+import os
+import warnings
+warnings.filterwarnings('ignore')
+
+def get_args_parser():
+    parser = argparse.ArgumentParser('Set parameters for P2PNet evaluation', add_help=False)
+    
+    # * Backbone
+    parser.add_argument('--backbone', default='vgg16_bn', type=str,
+                        help="name of the convolutional backbone to use")
+    
+    parser.add_argument('--input_video', default='../Video-tests/test1.mp4', type=str,
+                        help="address of input video file")
+
+    parser.add_argument('--row', default=2, type=int,
+                        help="row number of anchor points")
+    parser.add_argument('--line', default=2, type=int,
+                        help="line number of anchor points")
+
+    parser.add_argument('--output_dir', default='./logs/',
+                        help='path where to save')
+    parser.add_argument('--weight_path', default='./weights/SHTechA.pth',
+                        help='path where the trained weights saved')
+
+    parser.add_argument('--gpu_id', default=0, type=int, help='the gpu used for evaluation')
+
+    return parser
+
+def load_model():
+    parser = argparse.ArgumentParser('P2PNet evaluation script', parents=[get_args_parser()])
+    args = parser.parse_args()
+    # os.environ["CUDA_VISIBLE_DEVICES"] = '{}'.format(args.gpu_id)
+
+    # print(args)
+    device = torch.device('cpu')
+    # get the P2PNet
+    model = build_model(args)
+    # move to GPU
+    model.to(device)
+    # load trained model
+    if args.weight_path is not None:
+        checkpoint = torch.load(args.weight_path, map_location='cpu')
+        model.load_state_dict(checkpoint['model'])
+    # convert to eval mode
+    model.eval()
+    # create the pre-processing transform
+    transform = standard_transforms.Compose([
+        standard_transforms.ToTensor(), 
+        standard_transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+    ])
+    return model, transform, device
+
+def image_inference(model, transform, device, img_file):
+    # set your image path here
+    img_path = img_file
+    # load the images
+    img_raw = Image.open(img_path).convert('RGB')
+    # round the size
+    width, height = img_raw.size
+    new_width = width // 128 * 128
+    new_height = height // 128 * 128
+    img_raw = img_raw.resize((new_width, new_height), Image.ANTIALIAS)
+    # pre-proccessing
+    img = transform(img_raw)
+
+    samples = torch.Tensor(img).unsqueeze(0)
+    samples = samples.to(device)
+    # run inference
+    outputs = model(samples)
+    outputs_scores = torch.nn.functional.softmax(outputs['pred_logits'], -1)[:, :, 1][0]
+
+    outputs_points = outputs['pred_points'][0]
+
+    threshold = 0.5
+    # filter the predictions
+    points = outputs_points[outputs_scores > threshold].detach().cpu().numpy().tolist()
+    predict_cnt = int((outputs_scores > threshold).sum())
+
+    outputs_scores = torch.nn.functional.softmax(outputs['pred_logits'], -1)[:, :, 1][0]
+
+    outputs_points = outputs['pred_points'][0]
+    # draw the predictions
+    size = 2
+    img_to_draw = cv2.cvtColor(np.array(img_raw), cv2.COLOR_RGB2BGR)
+    for p in points:
+        img_to_draw = cv2.circle(img_to_draw, (int(p[0]), int(p[1])), size, (0, 0, 255), -1)
+    # save the visualized image
+    cv2.imwrite(img_file, img_to_draw)
+    return predict_cnt
+
+
+def video_reader(videoFile):
+    cap = cv2.VideoCapture(videoFile)
+    while(cap.isOpened()):
+        ret,cv2_im = cap.read()
+        if ret:
+            converted = cv2.cvtColor(cv2_im,cv2.COLOR_BGR2RGB)
+            pil_im = Image.fromarray(converted)
+            yield pil_im
+                    
+        elif not ret:
+            break
+    cap.release()
+
+
+def video_inference(model, transform, device, video_file):
+    result = []
+    for frame in tqdm(video_reader(video_file)):
+        img_raw = frame
+        # round the size
+        width, height = img_raw.size
+        new_width = width // 128 * 128
+        new_height = height // 128 * 128
+        img_raw = img_raw.resize((new_width, new_height), Image.ANTIALIAS)
+        frames_size = (new_width, new_height)
+        # pre-proccessing
+        img = transform(img_raw)
+
+        samples = torch.Tensor(img).unsqueeze(0)
+        samples = samples.to(device)
+        # run inference
+        outputs = model(samples)
+        outputs_scores = torch.nn.functional.softmax(outputs['pred_logits'], -1)[:, :, 1][0]
+
+        outputs_points = outputs['pred_points'][0]
+
+        threshold = 0.5
+        # filter the predictions
+        points = outputs_points[outputs_scores > threshold].detach().cpu().numpy().tolist()
+        predict_cnt = int((outputs_scores > threshold).sum())
+
+        outputs_scores = torch.nn.functional.softmax(outputs['pred_logits'], -1)[:, :, 1][0]
+
+        outputs_points = outputs['pred_points'][0]
+        # draw the predictions
+        size = 10
+        img_to_draw = cv2.cvtColor(np.array(img_raw), cv2.COLOR_RGB2BGR)
+        for p in points:
+            img_to_draw = cv2.circle(img_to_draw, (int(p[0]), int(p[1])), size, (0, 0, 255), -1)
+        # save the visualized image
+        # cv2.imwrite(os.path.join(args.output_dir, 'pred{}.jpg'.format(predict_cnt)), img_to_draw)
+        # break
+        if result:
+            result.write(img_to_draw)
+            break
+        else:
+            result = cv2.VideoWriter(f'{video_file}.avi',
+                         cv2.VideoWriter_fourcc(*'MJPG'),
+                         10, frames_size)    
+            result.write(img_to_draw)
+    result.release()
+    return True
diff --git a/logs/pred1248.jpg b/logs/pred1248.jpg
new file mode 100644
index 00000000..959bb5c1
Binary files /dev/null and b/logs/pred1248.jpg differ
diff --git a/logs/pred42.jpg b/logs/pred42.jpg
new file mode 100644
index 00000000..469e8a9b
Binary files /dev/null and b/logs/pred42.jpg differ
diff --git a/logs/pred91.jpg b/logs/pred91.jpg
new file mode 100644
index 00000000..eed671de
Binary files /dev/null and b/logs/pred91.jpg differ
diff --git a/models/__pycache__/__init__.cpython-39.pyc b/models/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 00000000..00f916a3
Binary files /dev/null and b/models/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/__pycache__/backbone.cpython-39.pyc b/models/__pycache__/backbone.cpython-39.pyc
new file mode 100644
index 00000000..7ca56989
Binary files /dev/null and b/models/__pycache__/backbone.cpython-39.pyc differ
diff --git a/models/__pycache__/matcher.cpython-39.pyc b/models/__pycache__/matcher.cpython-39.pyc
new file mode 100644
index 00000000..e2dfefe0
Binary files /dev/null and b/models/__pycache__/matcher.cpython-39.pyc differ
diff --git a/models/__pycache__/p2pnet.cpython-39.pyc b/models/__pycache__/p2pnet.cpython-39.pyc
new file mode 100644
index 00000000..9f18864a
Binary files /dev/null and b/models/__pycache__/p2pnet.cpython-39.pyc differ
diff --git a/models/__pycache__/vgg_.cpython-39.pyc b/models/__pycache__/vgg_.cpython-39.pyc
new file mode 100644
index 00000000..8b2066ed
Binary files /dev/null and b/models/__pycache__/vgg_.cpython-39.pyc differ
diff --git a/models/vgg_.py b/models/vgg_.py
index 130083df..d4b3db73 100644
--- a/models/vgg_.py
+++ b/models/vgg_.py
@@ -25,8 +25,8 @@
 
 
 model_paths = {
-    'vgg16_bn': '/apdcephfs/private_changanwang/checkpoints/vgg16_bn-6c64b313.pth',
-    'vgg16': '/apdcephfs/private_changanwang/checkpoints/vgg16-397923af.pth',
+    'vgg16_bn': './weights/vgg16_bn-6c64b313.pth',
+    'vgg16': './weights/vgg16-397923af.pth',
 
 }
 
diff --git a/requirements.txt b/requirements.txt
index 43646a23..01de3fb0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,4 +7,7 @@ numpy
 scipy
 matplotlib
 Pillow
-opencv-python
\ No newline at end of file
+opencv-python
+tqdm
+flask
+gunicorn
\ No newline at end of file
diff --git a/run_test.py b/run_test.py
index 12c8e36e..235c9360 100644
--- a/run_test.py
+++ b/run_test.py
@@ -43,7 +43,7 @@ def main(args, debug=False):
     os.environ["CUDA_VISIBLE_DEVICES"] = '{}'.format(args.gpu_id)
 
     print(args)
-    device = torch.device('cuda')
+    device = torch.device('cpu')
     # get the P2PNet
     model = build_model(args)
     # move to GPU
diff --git a/templates/upload.html b/templates/upload.html
new file mode 100644
index 00000000..c5374722
--- /dev/null
+++ b/templates/upload.html
@@ -0,0 +1,30 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+   <meta charset="UTF-8">
+   <meta http-equiv="X-UA-Compatible" content="IE=edge">
+   <meta name="viewport" content="width=device-width, initial-scale=1.0">
+   <title>Document</title>
+</head>
+<body>
+   <h1>Crowd-Counting-P2P</h1>
+   <div>
+      <h2>Image Inferece:</h2>
+      <br>
+      <form action = "http://127.0.0.1:5000/image" method = "POST" 
+            enctype = "multipart/form-data">
+            <input type = "file" name = "file" />
+            <input type = "submit"/>
+      </form>
+   </div>
+   <div>
+      <h2>Video Inferece:</h2>
+      <br>
+      <form action = "http://127.0.0.1:5000/video" method = "POST" 
+            enctype = "multipart/form-data">
+            <input type = "file" name = "file" />
+            <input type = "submit"/>
+      </form>
+   </div>    
+</body>
+</html>
diff --git a/util/__pycache__/__init__.cpython-39.pyc b/util/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 00000000..e30dc58e
Binary files /dev/null and b/util/__pycache__/__init__.cpython-39.pyc differ
diff --git a/util/__pycache__/misc.cpython-39.pyc b/util/__pycache__/misc.cpython-39.pyc
new file mode 100644
index 00000000..2699849c
Binary files /dev/null and b/util/__pycache__/misc.cpython-39.pyc differ
diff --git a/util/misc.py b/util/misc.py
index 8a67c260..7cfe7d73 100644
--- a/util/misc.py
+++ b/util/misc.py
@@ -22,9 +22,9 @@
 
 # needed due to empty tensor bug in pytorch and torchvision 0.5
 import torchvision
-if float(torchvision.__version__[:3]) < 0.7:
-    from torchvision.ops import _new_empty_tensor
-    from torchvision.ops.misc import _output_size
+# if float(torchvision.__version__[:3]) < 0.7:
+#     from torchvision.ops import _new_empty_tensor
+#     from torchvision.ops.misc import _output_size
 
 
 class SmoothedValue(object):
diff --git a/video_inference.py b/video_inference.py
new file mode 100644
index 00000000..80ac4c2e
--- /dev/null
+++ b/video_inference.py
@@ -0,0 +1,130 @@
+import argparse
+import datetime
+import random
+import time
+from pathlib import Path
+from tqdm import tqdm
+
+import torch
+import torchvision.transforms as standard_transforms
+import numpy as np
+
+from PIL import Image
+import cv2
+from crowd_datasets import build_dataset
+from engine import *
+from models import build_model
+import os
+import warnings
+warnings.filterwarnings('ignore')
+
+def get_args_parser():
+    parser = argparse.ArgumentParser('Set parameters for P2PNet evaluation', add_help=False)
+    
+    # * Backbone
+    parser.add_argument('--backbone', default='vgg16_bn', type=str,
+                        help="name of the convolutional backbone to use")
+    
+    parser.add_argument('--input_video', default='../Video-tests/test1.mp4', type=str,
+                        help="address of input video file")
+
+    parser.add_argument('--row', default=2, type=int,
+                        help="row number of anchor points")
+    parser.add_argument('--line', default=2, type=int,
+                        help="line number of anchor points")
+
+    parser.add_argument('--output_dir', default='./logs/',
+                        help='path where to save')
+    parser.add_argument('--weight_path', default='./weights/SHTechA.pth',
+                        help='path where the trained weights saved')
+
+    parser.add_argument('--gpu_id', default=0, type=int, help='the gpu used for evaluation')
+
+    return parser
+
+def load_model(args):
+    os.environ["CUDA_VISIBLE_DEVICES"] = '{}'.format(args.gpu_id)
+
+    print(args)
+    device = torch.device('cpu')
+    # get the P2PNet
+    model = build_model(args)
+    # move to GPU
+    model.to(device)
+    # load trained model
+    if args.weight_path is not None:
+        checkpoint = torch.load(args.weight_path, map_location='cpu')
+        model.load_state_dict(checkpoint['model'])
+    # convert to eval mode
+    model.eval()
+    # create the pre-processing transform
+    transform = standard_transforms.Compose([
+        standard_transforms.ToTensor(), 
+        standard_transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+    ])
+    return model, transform, device
+def video_reader(videoFile):
+    cap = cv2.VideoCapture(videoFile)
+    while(cap.isOpened()):
+        ret,cv2_im = cap.read()
+        if ret:
+            converted = cv2.cvtColor(cv2_im,cv2.COLOR_BGR2RGB)
+            pil_im = Image.fromarray(converted)
+            yield pil_im
+                    
+        elif not ret:
+            break
+    cap.release()
+
+
+def main(args, debug=False):
+    result = []
+    model, transform, device = load_model(args)
+    for frame in tqdm(video_reader(args.input_video)):
+        img_raw = frame
+        # round the size
+        width, height = img_raw.size
+        new_width = width // 128 * 128
+        new_height = height // 128 * 128
+        img_raw = img_raw.resize((new_width, new_height), Image.ANTIALIAS)
+        frames_size = (new_width, new_height)
+        # pre-proccessing
+        img = transform(img_raw)
+
+        samples = torch.Tensor(img).unsqueeze(0)
+        samples = samples.to(device)
+        # run inference
+        outputs = model(samples)
+        outputs_scores = torch.nn.functional.softmax(outputs['pred_logits'], -1)[:, :, 1][0]
+
+        outputs_points = outputs['pred_points'][0]
+
+        threshold = 0.5
+        # filter the predictions
+        points = outputs_points[outputs_scores > threshold].detach().cpu().numpy().tolist()
+        predict_cnt = int((outputs_scores > threshold).sum())
+
+        outputs_scores = torch.nn.functional.softmax(outputs['pred_logits'], -1)[:, :, 1][0]
+
+        outputs_points = outputs['pred_points'][0]
+        # draw the predictions
+        size = 10
+        img_to_draw = cv2.cvtColor(np.array(img_raw), cv2.COLOR_RGB2BGR)
+        for p in points:
+            img_to_draw = cv2.circle(img_to_draw, (int(p[0]), int(p[1])), size, (0, 0, 255), -1)
+        # save the visualized image
+        # cv2.imwrite(os.path.join(args.output_dir, 'pred{}.jpg'.format(predict_cnt)), img_to_draw)
+        # break
+        if result:
+            result.write(img_to_draw)
+        else:
+            result = cv2.VideoWriter(f'{args.output_dir}pred_{args.input_video}.avi', 
+                         cv2.VideoWriter_fourcc(*'MJPG'),
+                         10, frames_size)    
+            result.write(img_to_draw)
+    result.release()
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser('P2PNet evaluation script', parents=[get_args_parser()])
+    args = parser.parse_args()
+    main(args)
\ No newline at end of file