diff --git a/Procfile b/Procfile new file mode 100644 index 00000000..fc272ab9 --- /dev/null +++ b/Procfile @@ -0,0 +1 @@ +web: gunicorn wsgi:app \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 00000000..91986221 --- /dev/null +++ b/app.py @@ -0,0 +1,44 @@ +from time import sleep +from flask import Flask, render_template, request, send_file +from werkzeug.utils import secure_filename +from werkzeug.datastructures import FileStorage +import cv2 +import os +import glob +import inference_flask as util +app = Flask(__name__) + +model, transform, device = util.load_model() + +@app.route('/') +def r_upload_file(): + return render_template('upload.html') + +@app.route('/image', methods = ['GET', 'POST']) +def image(): + global model, transform, device + for file in glob.glob('./*'): + if file.endswith('.jpg') or file.endswith('.png') or file.endswith('jpeg'): + os.remove(file) + if request.method == 'POST': + f = request.files['file'] + f.save(secure_filename(f.filename)) + # inference + util.image_inference(model, transform, device, secure_filename(f.filename)) + return send_file(secure_filename(f.filename)) + +@app.route('/video', methods = ['GET', 'POST']) +def video(): + global model, transform, device + for file in glob.glob('./*'): + if file.endswith('.mp4') or file.endswith('.avi'): + os.remove(file) + if request.method == 'POST': + f = request.files['file'] + f.save(secure_filename(f.filename)) + # inference + util.video_inference(model, transform, device, secure_filename(f.filename)) + return send_file(secure_filename(f.filename)+'.avi') + +if __name__ == '__main__': + app.run(debug = False) \ No newline at end of file diff --git a/crowd_datasets/__pycache__/__init__.cpython-39.pyc b/crowd_datasets/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 00000000..2d372275 Binary files /dev/null and b/crowd_datasets/__pycache__/__init__.cpython-39.pyc differ diff --git a/inference_flask.py b/inference_flask.py new file mode 100644 index 00000000..45063371 --- /dev/null +++ b/inference_flask.py @@ -0,0 +1,168 @@ +import argparse +import datetime +import random +import time +from pathlib import Path +from tqdm import tqdm + +import torch +import torchvision.transforms as standard_transforms +import numpy as np + +from PIL import Image +import cv2 +from crowd_datasets import build_dataset +from engine import * +from models import build_model +import os +import warnings +warnings.filterwarnings('ignore') + +def get_args_parser(): + parser = argparse.ArgumentParser('Set parameters for P2PNet evaluation', add_help=False) + + # * Backbone + parser.add_argument('--backbone', default='vgg16_bn', type=str, + help="name of the convolutional backbone to use") + + parser.add_argument('--input_video', default='../Video-tests/test1.mp4', type=str, + help="address of input video file") + + parser.add_argument('--row', default=2, type=int, + help="row number of anchor points") + parser.add_argument('--line', default=2, type=int, + help="line number of anchor points") + + parser.add_argument('--output_dir', default='./logs/', + help='path where to save') + parser.add_argument('--weight_path', default='./weights/SHTechA.pth', + help='path where the trained weights saved') + + parser.add_argument('--gpu_id', default=0, type=int, help='the gpu used for evaluation') + + return parser + +def load_model(): + parser = argparse.ArgumentParser('P2PNet evaluation script', parents=[get_args_parser()]) + args = parser.parse_args() + # os.environ["CUDA_VISIBLE_DEVICES"] = '{}'.format(args.gpu_id) + + # print(args) + device = torch.device('cpu') + # get the P2PNet + model = build_model(args) + # move to GPU + model.to(device) + # load trained model + if args.weight_path is not None: + checkpoint = torch.load(args.weight_path, map_location='cpu') + model.load_state_dict(checkpoint['model']) + # convert to eval mode + model.eval() + # create the pre-processing transform + transform = standard_transforms.Compose([ + standard_transforms.ToTensor(), + standard_transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ]) + return model, transform, device + +def image_inference(model, transform, device, img_file): + # set your image path here + img_path = img_file + # load the images + img_raw = Image.open(img_path).convert('RGB') + # round the size + width, height = img_raw.size + new_width = width // 128 * 128 + new_height = height // 128 * 128 + img_raw = img_raw.resize((new_width, new_height), Image.ANTIALIAS) + # pre-proccessing + img = transform(img_raw) + + samples = torch.Tensor(img).unsqueeze(0) + samples = samples.to(device) + # run inference + outputs = model(samples) + outputs_scores = torch.nn.functional.softmax(outputs['pred_logits'], -1)[:, :, 1][0] + + outputs_points = outputs['pred_points'][0] + + threshold = 0.5 + # filter the predictions + points = outputs_points[outputs_scores > threshold].detach().cpu().numpy().tolist() + predict_cnt = int((outputs_scores > threshold).sum()) + + outputs_scores = torch.nn.functional.softmax(outputs['pred_logits'], -1)[:, :, 1][0] + + outputs_points = outputs['pred_points'][0] + # draw the predictions + size = 2 + img_to_draw = cv2.cvtColor(np.array(img_raw), cv2.COLOR_RGB2BGR) + for p in points: + img_to_draw = cv2.circle(img_to_draw, (int(p[0]), int(p[1])), size, (0, 0, 255), -1) + # save the visualized image + cv2.imwrite(img_file, img_to_draw) + return predict_cnt + + +def video_reader(videoFile): + cap = cv2.VideoCapture(videoFile) + while(cap.isOpened()): + ret,cv2_im = cap.read() + if ret: + converted = cv2.cvtColor(cv2_im,cv2.COLOR_BGR2RGB) + pil_im = Image.fromarray(converted) + yield pil_im + + elif not ret: + break + cap.release() + + +def video_inference(model, transform, device, video_file): + result = [] + for frame in tqdm(video_reader(video_file)): + img_raw = frame + # round the size + width, height = img_raw.size + new_width = width // 128 * 128 + new_height = height // 128 * 128 + img_raw = img_raw.resize((new_width, new_height), Image.ANTIALIAS) + frames_size = (new_width, new_height) + # pre-proccessing + img = transform(img_raw) + + samples = torch.Tensor(img).unsqueeze(0) + samples = samples.to(device) + # run inference + outputs = model(samples) + outputs_scores = torch.nn.functional.softmax(outputs['pred_logits'], -1)[:, :, 1][0] + + outputs_points = outputs['pred_points'][0] + + threshold = 0.5 + # filter the predictions + points = outputs_points[outputs_scores > threshold].detach().cpu().numpy().tolist() + predict_cnt = int((outputs_scores > threshold).sum()) + + outputs_scores = torch.nn.functional.softmax(outputs['pred_logits'], -1)[:, :, 1][0] + + outputs_points = outputs['pred_points'][0] + # draw the predictions + size = 10 + img_to_draw = cv2.cvtColor(np.array(img_raw), cv2.COLOR_RGB2BGR) + for p in points: + img_to_draw = cv2.circle(img_to_draw, (int(p[0]), int(p[1])), size, (0, 0, 255), -1) + # save the visualized image + # cv2.imwrite(os.path.join(args.output_dir, 'pred{}.jpg'.format(predict_cnt)), img_to_draw) + # break + if result: + result.write(img_to_draw) + break + else: + result = cv2.VideoWriter(f'{video_file}.avi', + cv2.VideoWriter_fourcc(*'MJPG'), + 10, frames_size) + result.write(img_to_draw) + result.release() + return True diff --git a/logs/pred1248.jpg b/logs/pred1248.jpg new file mode 100644 index 00000000..959bb5c1 Binary files /dev/null and b/logs/pred1248.jpg differ diff --git a/logs/pred42.jpg b/logs/pred42.jpg new file mode 100644 index 00000000..469e8a9b Binary files /dev/null and b/logs/pred42.jpg differ diff --git a/logs/pred91.jpg b/logs/pred91.jpg new file mode 100644 index 00000000..eed671de Binary files /dev/null and b/logs/pred91.jpg differ diff --git a/models/__pycache__/__init__.cpython-39.pyc b/models/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 00000000..00f916a3 Binary files /dev/null and b/models/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/__pycache__/backbone.cpython-39.pyc b/models/__pycache__/backbone.cpython-39.pyc new file mode 100644 index 00000000..7ca56989 Binary files /dev/null and b/models/__pycache__/backbone.cpython-39.pyc differ diff --git a/models/__pycache__/matcher.cpython-39.pyc b/models/__pycache__/matcher.cpython-39.pyc new file mode 100644 index 00000000..e2dfefe0 Binary files /dev/null and b/models/__pycache__/matcher.cpython-39.pyc differ diff --git a/models/__pycache__/p2pnet.cpython-39.pyc b/models/__pycache__/p2pnet.cpython-39.pyc new file mode 100644 index 00000000..9f18864a Binary files /dev/null and b/models/__pycache__/p2pnet.cpython-39.pyc differ diff --git a/models/__pycache__/vgg_.cpython-39.pyc b/models/__pycache__/vgg_.cpython-39.pyc new file mode 100644 index 00000000..8b2066ed Binary files /dev/null and b/models/__pycache__/vgg_.cpython-39.pyc differ diff --git a/models/vgg_.py b/models/vgg_.py index 130083df..d4b3db73 100644 --- a/models/vgg_.py +++ b/models/vgg_.py @@ -25,8 +25,8 @@ model_paths = { - 'vgg16_bn': '/apdcephfs/private_changanwang/checkpoints/vgg16_bn-6c64b313.pth', - 'vgg16': '/apdcephfs/private_changanwang/checkpoints/vgg16-397923af.pth', + 'vgg16_bn': './weights/vgg16_bn-6c64b313.pth', + 'vgg16': './weights/vgg16-397923af.pth', } diff --git a/requirements.txt b/requirements.txt index 43646a23..01de3fb0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,7 @@ numpy scipy matplotlib Pillow -opencv-python \ No newline at end of file +opencv-python +tqdm +flask +gunicorn \ No newline at end of file diff --git a/run_test.py b/run_test.py index 12c8e36e..235c9360 100644 --- a/run_test.py +++ b/run_test.py @@ -43,7 +43,7 @@ def main(args, debug=False): os.environ["CUDA_VISIBLE_DEVICES"] = '{}'.format(args.gpu_id) print(args) - device = torch.device('cuda') + device = torch.device('cpu') # get the P2PNet model = build_model(args) # move to GPU diff --git a/templates/upload.html b/templates/upload.html new file mode 100644 index 00000000..c5374722 --- /dev/null +++ b/templates/upload.html @@ -0,0 +1,30 @@ + + + + + + + Document + + +

Crowd-Counting-P2P

+
+

Image Inferece:

+
+
+ + +
+
+
+

Video Inferece:

+
+
+ + +
+
+ + diff --git a/util/__pycache__/__init__.cpython-39.pyc b/util/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 00000000..e30dc58e Binary files /dev/null and b/util/__pycache__/__init__.cpython-39.pyc differ diff --git a/util/__pycache__/misc.cpython-39.pyc b/util/__pycache__/misc.cpython-39.pyc new file mode 100644 index 00000000..2699849c Binary files /dev/null and b/util/__pycache__/misc.cpython-39.pyc differ diff --git a/util/misc.py b/util/misc.py index 8a67c260..7cfe7d73 100644 --- a/util/misc.py +++ b/util/misc.py @@ -22,9 +22,9 @@ # needed due to empty tensor bug in pytorch and torchvision 0.5 import torchvision -if float(torchvision.__version__[:3]) < 0.7: - from torchvision.ops import _new_empty_tensor - from torchvision.ops.misc import _output_size +# if float(torchvision.__version__[:3]) < 0.7: +# from torchvision.ops import _new_empty_tensor +# from torchvision.ops.misc import _output_size class SmoothedValue(object): diff --git a/video_inference.py b/video_inference.py new file mode 100644 index 00000000..80ac4c2e --- /dev/null +++ b/video_inference.py @@ -0,0 +1,130 @@ +import argparse +import datetime +import random +import time +from pathlib import Path +from tqdm import tqdm + +import torch +import torchvision.transforms as standard_transforms +import numpy as np + +from PIL import Image +import cv2 +from crowd_datasets import build_dataset +from engine import * +from models import build_model +import os +import warnings +warnings.filterwarnings('ignore') + +def get_args_parser(): + parser = argparse.ArgumentParser('Set parameters for P2PNet evaluation', add_help=False) + + # * Backbone + parser.add_argument('--backbone', default='vgg16_bn', type=str, + help="name of the convolutional backbone to use") + + parser.add_argument('--input_video', default='../Video-tests/test1.mp4', type=str, + help="address of input video file") + + parser.add_argument('--row', default=2, type=int, + help="row number of anchor points") + parser.add_argument('--line', default=2, type=int, + help="line number of anchor points") + + parser.add_argument('--output_dir', default='./logs/', + help='path where to save') + parser.add_argument('--weight_path', default='./weights/SHTechA.pth', + help='path where the trained weights saved') + + parser.add_argument('--gpu_id', default=0, type=int, help='the gpu used for evaluation') + + return parser + +def load_model(args): + os.environ["CUDA_VISIBLE_DEVICES"] = '{}'.format(args.gpu_id) + + print(args) + device = torch.device('cpu') + # get the P2PNet + model = build_model(args) + # move to GPU + model.to(device) + # load trained model + if args.weight_path is not None: + checkpoint = torch.load(args.weight_path, map_location='cpu') + model.load_state_dict(checkpoint['model']) + # convert to eval mode + model.eval() + # create the pre-processing transform + transform = standard_transforms.Compose([ + standard_transforms.ToTensor(), + standard_transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ]) + return model, transform, device +def video_reader(videoFile): + cap = cv2.VideoCapture(videoFile) + while(cap.isOpened()): + ret,cv2_im = cap.read() + if ret: + converted = cv2.cvtColor(cv2_im,cv2.COLOR_BGR2RGB) + pil_im = Image.fromarray(converted) + yield pil_im + + elif not ret: + break + cap.release() + + +def main(args, debug=False): + result = [] + model, transform, device = load_model(args) + for frame in tqdm(video_reader(args.input_video)): + img_raw = frame + # round the size + width, height = img_raw.size + new_width = width // 128 * 128 + new_height = height // 128 * 128 + img_raw = img_raw.resize((new_width, new_height), Image.ANTIALIAS) + frames_size = (new_width, new_height) + # pre-proccessing + img = transform(img_raw) + + samples = torch.Tensor(img).unsqueeze(0) + samples = samples.to(device) + # run inference + outputs = model(samples) + outputs_scores = torch.nn.functional.softmax(outputs['pred_logits'], -1)[:, :, 1][0] + + outputs_points = outputs['pred_points'][0] + + threshold = 0.5 + # filter the predictions + points = outputs_points[outputs_scores > threshold].detach().cpu().numpy().tolist() + predict_cnt = int((outputs_scores > threshold).sum()) + + outputs_scores = torch.nn.functional.softmax(outputs['pred_logits'], -1)[:, :, 1][0] + + outputs_points = outputs['pred_points'][0] + # draw the predictions + size = 10 + img_to_draw = cv2.cvtColor(np.array(img_raw), cv2.COLOR_RGB2BGR) + for p in points: + img_to_draw = cv2.circle(img_to_draw, (int(p[0]), int(p[1])), size, (0, 0, 255), -1) + # save the visualized image + # cv2.imwrite(os.path.join(args.output_dir, 'pred{}.jpg'.format(predict_cnt)), img_to_draw) + # break + if result: + result.write(img_to_draw) + else: + result = cv2.VideoWriter(f'{args.output_dir}pred_{args.input_video}.avi', + cv2.VideoWriter_fourcc(*'MJPG'), + 10, frames_size) + result.write(img_to_draw) + result.release() + +if __name__ == '__main__': + parser = argparse.ArgumentParser('P2PNet evaluation script', parents=[get_args_parser()]) + args = parser.parse_args() + main(args) \ No newline at end of file