diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..0015c6b --- /dev/null +++ b/.dockerignore @@ -0,0 +1,6 @@ +Dockerfile +README.md +*.pyc +*.pyo +*.pyd +__pycache__ \ No newline at end of file diff --git a/.gitignore b/.gitignore index 6862f4f..defe269 100644 --- a/.gitignore +++ b/.gitignore @@ -103,4 +103,6 @@ venv.bak/ # mypy .mypy_cache/ -.vscode/ \ No newline at end of file +.vscode/ + +*.hdf5 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..21d69a5 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,20 @@ +FROM python:3.7-slim +LABEL maintainer "Manabu TERADA " + +RUN apt-get update -y +RUN apt-get install -y build-essential libssl-dev libxml2-dev libxslt1-dev libbz2-dev zlib1g-dev libreadline-gplv2-dev libpq-dev + +RUN mkdir /code +RUN mkdir /code/log +WORKDIR /code + +RUN pip install -U pip setuptools +ADD requirements.txt . +ADD . /code/ +RUN pip install -e . -c requirements.txt + +#RUN pip freeze > /work/requirements.txt + +EXPOSE 8080 +CMD ["uwsgi","--emperor","/code/uwsgi.ini", "--logto", "/code/log/emperor.log"] +# CMD ["/bin/sh"] \ No newline at end of file diff --git a/Dockerfile.tf b/Dockerfile.tf new file mode 100644 index 0000000..de9a463 --- /dev/null +++ b/Dockerfile.tf @@ -0,0 +1,17 @@ +FROM tensorflow/tensorflow:1.12.0-py3 + +RUN mkdir /code +RUN mkdir /code/log +WORKDIR /code + +RUN pip install -U pip setuptools +ADD requirements.txt . +ADD . /code/ +RUN pip install -e . -c requirements-detector.txt + +#RUN pip freeze > /work/requirements.txt + +# EXPOSE 8080 +CMD ["python","mahjong_sample_web_app/detector/detector.py"] +# CMD ["uwsgi","--emperor","/code/uwsgi.ini", "--logto", "/code/log/emperor.log"] +# CMD ["/bin/sh"] \ No newline at end of file diff --git a/README.md b/README.md index b656e13..7bdda17 100644 --- a/README.md +++ b/README.md @@ -37,3 +37,15 @@ $ pip install -e .["test"] $ pytest -v ``` + +# Docker環境 + +``` +$ docker build . -t local/mahjong_sample +$ docker run --rm -it -p 8080:8080 -v $PWD/log:/code/log local/mahjong_sample +``` + +``` +$ docker build -t local/mahjong_sample_tf -f Dockerfile.tf . +$ docker run --rm -it local/mahjong_sample_tf +``` \ No newline at end of file diff --git a/mahjong_sample_web_app/detector/__init__.py b/mahjong_sample_web_app/detector/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mahjong_sample_web_app/detector/detector.py b/mahjong_sample_web_app/detector/detector.py new file mode 100644 index 0000000..cfe0518 --- /dev/null +++ b/mahjong_sample_web_app/detector/detector.py @@ -0,0 +1,83 @@ +from pathlib import Path +import numpy as np + +# from scipy.misc import imresize +from PIL import Image as pil_image +from keras.preprocessing import image +from postprocess import PostProcess +from ssd.ssd import SingleShotMultiBoxDetector + + +model_file = Path(__file__).parent / "models" / "weights.25-0.05.hdf5" +param_file = ( + Path(__file__).parent / "models" / "ssd300_params_mahjong_vgg16_train_2.json" +) + + +def model_build(model_file, param_file): + ssd = SingleShotMultiBoxDetector( + overlap_threshold=0.5, nms_threshold=0.45, max_output_size=400 + ) + ssd.load_parameters(str(param_file)) + ssd.build(init_weight=str(model_file)) + return ssd + + +def _add_margin(img): + img_shape = list(img.shape) + if img_shape[0] == img_shape[1]: + return img + if img_shape[0] < img_shape[1]: + min_arg = 0 + max_arg = 1 + else: + min_arg = 1 + max_arg = 0 + margin_shape = img_shape + margin_shape[min_arg] = int((img_shape[max_arg] - img_shape[min_arg]) / 2.0) + margin = np.tile([0.0], margin_shape) + new_img = np.concatenate([margin, img], axis=min_arg) + new_img = np.concatenate([new_img, margin], axis=min_arg) + return new_img + + +def pred(ssd, img): + inputs = np.array([img.copy()]) + results = ssd.detect(inputs, batch_size=1, verbose=1, do_preprocess=True) + return results + + +def load_image(img_obj, input_shape=(512, 512)): + # img = image.load_img(img_path) + img = pil_image.open(img_obj) + if img.mode != "RGB": + img = img.convert("RGB") + + img_array = image.img_to_array(img) + new_img = _add_margin(img_array) + new_img_float = np.array( + pil_image.fromarray(new_img.astype("uint8")).resize( + size=input_shape # , resample=pil_image.BICUBIC + ) + ).astype("float32") + # new_img_float = imresize(new_img, input_shape).astype("float32") + return new_img_float + + +def detect(img_obj): + img = load_image(img_obj) + ssd = model_build(model_file, param_file) + pred_result = pred(ssd, img) + + pp = PostProcess(ssd.class_names, pred_threshold=0.9) + pp.set_top_score(pred_result) + list_label = pp.get_list_pi() + # pp.save_image(img, pred_result, savepath) + # print(list_label) + return list_label + + +if __name__ == "__main__": + ssd = model_build(model_file, param_file) + print(ssd) + diff --git a/mahjong_sample_web_app/detector/models/ssd300_params_mahjong_vgg16_train_2.json b/mahjong_sample_web_app/detector/models/ssd300_params_mahjong_vgg16_train_2.json new file mode 100644 index 0000000..90488df --- /dev/null +++ b/mahjong_sample_web_app/detector/models/ssd300_params_mahjong_vgg16_train_2.json @@ -0,0 +1,114 @@ +{ + "aspect_ratios": [ + [ + 2.0, + 0.5 + ], + [ + 2.0, + 0.5, + 3.0, + 0.3333333333333333 + ], + [ + 2.0, + 0.5, + 3.0, + 0.3333333333333333 + ], + [ + 2.0, + 0.5, + 3.0, + 0.3333333333333333 + ], + [ + 2.0, + 0.5, + 3.0, + 0.3333333333333333 + ], + [ + 2.0, + 0.5, + 3.0, + 0.3333333333333333 + ] + ], + "base_net": "vgg16", + "class_names": [ + "bg", + "1m", + "2m", + "3m", + "4m", + "5m", + "6m", + "7m", + "8m", + "9m", + "1p", + "2p", + "3p", + "4p", + "5p", + "6p", + "7p", + "8p", + "9p", + "1s", + "2s", + "3s", + "4s", + "5s", + "6s", + "7s", + "8s", + "9s", + "c", + "e", + "f", + "h", + "n", + "s", + "w" + ], + "input_shape": [ + 512, + 512, + 3 + ], + "model_type": "ssd300", + "n_classes": 35, + "scales": [ + [ + 30.0 + ], + [ + 60.0, + 114.0 + ], + [ + 114.0, + 168.0 + ], + [ + 168.0, + 222.0 + ], + [ + 222.0, + 276.0 + ], + [ + 276.0, + 330.0 + ] + ], + "variances": [ + 0.1, + 0.1, + 0.2, + 0.2 + ] +} \ No newline at end of file diff --git a/mahjong_sample_web_app/detector/postprocess.py b/mahjong_sample_web_app/detector/postprocess.py new file mode 100644 index 0000000..9425199 --- /dev/null +++ b/mahjong_sample_web_app/detector/postprocess.py @@ -0,0 +1,71 @@ +# import matplotlib.pyplot as plt +import numpy as np + + +class PostProcess: + def __init__(self, class_names, pred_threshold=0.9): + self.pred_threshold = pred_threshold + self.class_names = class_names + + def set_top_score(self, pred_result): + # Parse the outputs. + det_label = pred_result[0][:, 0] + det_conf = pred_result[0][:, 1] + + # get top score result + self.top_indices = [ + i for i, conf in enumerate(det_conf) if conf >= self.pred_threshold + ] + self.top_conf = det_conf[self.top_indices] + self.top_label_indices = det_label[self.top_indices].tolist() + + def get_list_pi(self): + list_label = [] + for i in range(self.top_conf.shape[0]): + label = int(self.top_label_indices[i]) + label_name = self.class_names[label] + list_label.append(label_name) + + return list_label + + # def save_image(self, img, pred_result, savepath): + # colors = plt.cm.hsv(np.linspace(0, 1, 35)).tolist() + # plt.tick_params( + # labelbottom=False, labelleft=False, labelright=False, labeltop=False + # ) + # plt.tick_params(bottom=False, left=False, right=False, top=False) + + # plt.imshow(img / 255.0) + # currentAxis = plt.gca() + + # det_xmin = pred_result[0][:, 2] + # det_ymin = pred_result[0][:, 3] + # det_xmax = pred_result[0][:, 4] + # det_ymax = pred_result[0][:, 5] + + # top_xmin = det_xmin[self.top_indices] + # top_ymin = det_ymin[self.top_indices] + # top_xmax = det_xmax[self.top_indices] + # top_ymax = det_ymax[self.top_indices] + + # for i in range(self.top_conf.shape[0]): + # xmin = int(round(top_xmin[i] * img.shape[1])) + # ymin = int(round(top_ymin[i] * img.shape[0])) + # xmax = int(round(top_xmax[i] * img.shape[1])) + # ymax = int(round(top_ymax[i] * img.shape[0])) + + # label = int(self.top_label_indices[i]) + # score = self.top_conf[i] + # label_name = self.class_names[label] + # display_txt = "{:0.2f}, {}".format(score, label_name) + # coords = (xmin, ymin), xmax - xmin + 1, ymax - ymin + 1 + # color = colors[label] + + # currentAxis.add_patch( + # plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2) + # ) + # currentAxis.text( + # xmin, ymin, display_txt, bbox={"facecolor": color, "alpha": 1.0} + # ) + + # plt.savefig(savepath) diff --git a/mahjong_sample_web_app/detector/ssd/__init__.py b/mahjong_sample_web_app/detector/ssd/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mahjong_sample_web_app/detector/ssd/layers.py b/mahjong_sample_web_app/detector/ssd/layers.py new file mode 100755 index 0000000..f02f471 --- /dev/null +++ b/mahjong_sample_web_app/detector/ssd/layers.py @@ -0,0 +1,33 @@ +import numpy as np +import keras.backend as K +from keras.engine.topology import Layer +from keras.engine.topology import InputSpec + + +class L2Normalization(Layer): + """ + """ + + def __init__(self, scale, **kwargs): + self.scale = scale + self.gamma = None + self.axis = None + # if K.image_dim_ordering() == "tf": + if K.image_data_format() == "tf": + self.axis = 3 + else: + self.axis = 1 + super(L2Normalization, self).__init__(**kwargs) + + def build(self, input_shape): + self.input_spec = [InputSpec(shape=input_shape)] + shape = (input_shape[self.axis],) + self.gamma = K.variable( + self.scale * np.ones(shape), name="{}_gamma".format(self.name) + ) + self.trainable_weights = [self.gamma] + + def call(self, x, mask=None): + output = K.l2_normalize(x, self.axis) + output *= self.gamma + return output diff --git a/mahjong_sample_web_app/detector/ssd/losses.py b/mahjong_sample_web_app/detector/ssd/losses.py new file mode 100755 index 0000000..76f411f --- /dev/null +++ b/mahjong_sample_web_app/detector/ssd/losses.py @@ -0,0 +1,139 @@ +import tensorflow as tf +from keras.losses import categorical_crossentropy + + +class MultiBoxLoss: + """ + """ + def __init__(self, n_classes, alpha=1.0, neg_pos_ratio=3.0, + negatives_for_hard=100): + self.n_classes = n_classes + self.alpha = alpha + self.neg_pos_ratio = neg_pos_ratio + self.negatives_for_hard = negatives_for_hard + + def _softmax_loss(self, y_true, y_pred): + """ + """ + softmax_loss = categorical_crossentropy(y_true, y_pred) + # y_pred = tf.maximum(tf.minimum(y_pred, 1 - 1e-15), 1e-15) + # softmax_loss = -tf.reduce_sum(y_true * tf.log(y_pred), axis=-1) + return softmax_loss + + def _l1_smooth_loss(self, y_true, y_pred): + """ + """ + abs_loss = tf.abs(y_true - y_pred) + sq_loss = 0.5 * (y_true - y_pred)**2 + l1_loss = tf.where(tf.less(abs_loss, 1.0), sq_loss, abs_loss - 0.5) + return tf.reduce_sum(l1_loss, -1) + + def compute_loss_old(self, y_true, y_pred): + """ compute loss + """ + batch_size = tf.shape(y_true)[0] + num_boxes = tf.to_float(tf.shape(y_true)[1]) + + # loss for all default boxes + conf_loss = self._softmax_loss(y_true[:, :, 4:], + y_pred[:, :, 4:]) + loc_loss = self._l1_smooth_loss(y_true[:, :, :4], + y_pred[:, :, :4]) + + # positives loss + num_pos = num_boxes - tf.reduce_sum(y_true[:, :, 4], axis=-1) + fpmask = 1 - y_true[:, :, 4] + pos_loc_loss = tf.reduce_sum(loc_loss * fpmask, axis=1) + pos_conf_loss = tf.reduce_sum(conf_loss * fpmask, axis=1) + + # negatives loss + num_neg = tf.minimum(self.neg_pos_ratio * num_pos, + num_boxes - num_pos) + pos_num_neg_mask = tf.greater(num_neg, 0) + has_min = tf.to_float(tf.reduce_any(pos_num_neg_mask)) + num_neg = tf.concat(axis=0, + values=[num_neg, + [(1 - has_min) * self.negatives_for_hard]]) + num_neg_batch = tf.reduce_min(tf.boolean_mask(num_neg, + tf.greater(num_neg, 0))) + num_neg_batch = tf.to_int32(num_neg_batch) + confs_start = 4 + 1 + confs_end = confs_start + self.n_classes - 1 + max_confs = tf.reduce_max(y_pred[:, :, confs_start:confs_end], + axis=2) + + nvalues, indices = tf.nn.top_k(max_confs * y_true[:, :, 4], + k=num_neg_batch) + + batch_idx = tf.expand_dims(tf.range(0, batch_size), 1) + batch_idx = tf.tile(batch_idx, (1, num_neg_batch)) + full_indices = (tf.reshape(batch_idx, [-1]) * tf.to_int32(num_boxes) + + tf.reshape(indices, [-1])) + + neg_conf_loss = tf.gather(tf.reshape(conf_loss, [-1]), + full_indices) + neg_conf_loss = tf.reshape(neg_conf_loss, + [batch_size, num_neg_batch]) + neg_conf_loss = tf.reduce_sum(neg_conf_loss, axis=1) + + # loss is sum of positives and negatives + total_loss = pos_conf_loss + neg_conf_loss + total_loss /= (num_pos + tf.to_float(num_neg_batch)) + num_pos = tf.where(tf.not_equal(num_pos, 0), num_pos, + tf.ones_like(num_pos)) + total_loss += (self.alpha * pos_loc_loss) / num_pos + return total_loss + + def compute_loss(self, y_true, y_pred): + """ compute loss + """ + batch_size = tf.shape(y_true)[0] + num_boxes = tf.to_float(tf.shape(y_true)[1]) + + # loss for all default boxes + conf_loss = self._softmax_loss(y_true[:, :, 4:], + y_pred[:, :, 4:]) + loc_loss = self._l1_smooth_loss(y_true[:, :, :4], + y_pred[:, :, :4]) + + # positives loss + num_pos = num_boxes - tf.reduce_sum(y_true[:, :, 4], axis=-1) + fpmask = 1 - y_true[:, :, 4] + pos_loc_loss = tf.reduce_sum(loc_loss * fpmask, axis=1) + pos_conf_loss = tf.reduce_sum(conf_loss * fpmask, axis=1) + + # negatives loss + num_neg = tf.minimum(self.neg_pos_ratio * num_pos, + num_boxes - num_pos) + pos_num_neg_mask = tf.greater(num_neg, 0) + has_min = tf.to_float(tf.reduce_any(pos_num_neg_mask)) + num_neg = tf.concat(axis=0, + values=[num_neg, + [(1 - has_min) * self.negatives_for_hard]]) + num_neg_batch = tf.reduce_min(tf.boolean_mask(num_neg, + tf.greater(num_neg, 0))) + num_neg_batch = tf.to_int32(num_neg_batch) + confs_start = 4 + 1 + confs_end = confs_start + self.n_classes - 1 + max_confs = tf.reduce_max(y_pred[:, :, confs_start:confs_end], + axis=2) + + nvalues, indices = tf.nn.top_k(max_confs * y_true[:, :, 4], + k=num_neg_batch) + min_nvalues = nvalues[:, -1] + min_nvalues = tf.expand_dims(min_nvalues, 1) + min_nvalues = tf.tile(min_nvalues, (1, tf.shape(max_confs)[1])) + nmask = tf.logical_not(tf.cast(fpmask, tf.bool)) + nmask = tf.logical_and(nmask, + tf.greater_equal(max_confs, min_nvalues)) + fnmask = tf.to_float(nmask) + + neg_conf_loss = tf.reduce_sum(conf_loss * fnmask, axis=1) + + # loss is sum of positives and negatives + total_loss = pos_conf_loss + neg_conf_loss + total_loss /= (num_pos + tf.to_float(num_neg_batch)) + num_pos = tf.where(tf.not_equal(num_pos, 0), num_pos, + tf.ones_like(num_pos)) + total_loss += (self.alpha * pos_loc_loss) / num_pos + return total_loss diff --git a/mahjong_sample_web_app/detector/ssd/models.py b/mahjong_sample_web_app/detector/ssd/models.py new file mode 100755 index 0000000..a1c7597 --- /dev/null +++ b/mahjong_sample_web_app/detector/ssd/models.py @@ -0,0 +1,1143 @@ +import numpy as np +from collections import OrderedDict +from keras.layers import Input, Conv2D, MaxPool2D +from keras.layers import Flatten, Reshape, Activation +from keras.layers import GlobalAveragePooling2D +from keras.layers.merge import concatenate +from keras.models import Model +from .layers import L2Normalization +from .utils import make_bboxes + + +def _build_xception_basenet(network): + """ + """ + from keras import layers + from keras.layers import SeparableConv2D + from keras.layers import BatchNormalization + from keras.layers import MaxPooling2D + x = Conv2D(32, (3, 3), + strides=(2, 2), + use_bias=False, + name='block1_conv1')(network["input"]) + x = BatchNormalization(name='block1_conv1_bn')(x) + x = Activation('relu', name='block1_conv1_act')(x) + x = Conv2D(64, (3, 3), use_bias=False, name='block1_conv2')(x) + x = BatchNormalization(name='block1_conv2_bn')(x) + x = Activation('relu', name='block1_conv2_act')(x) + + residual = Conv2D(128, (1, 1), strides=(2, 2), + padding='same', use_bias=False)(x) + residual = BatchNormalization()(residual) + + x = SeparableConv2D(128, (3, 3), + padding='same', + use_bias=False, + name='block2_sepconv1')(x) + x = BatchNormalization(name='block2_sepconv1_bn')(x) + x = Activation('relu', name='block2_sepconv2_act')(x) + x = SeparableConv2D(128, (3, 3), + padding='same', + use_bias=False, + name='block2_sepconv2')(x) + x = BatchNormalization(name='block2_sepconv2_bn')(x) + + x = MaxPooling2D((3, 3), + strides=(2, 2), + padding='same', + name='block2_pool')(x) + x = layers.add([x, residual]) + + residual = Conv2D(256, (1, 1), strides=(2, 2), + padding='same', use_bias=False)(x) + residual = BatchNormalization()(residual) + + x = Activation('relu', name='block3_sepconv1_act')(x) + x = SeparableConv2D(256, (3, 3), + padding='same', + use_bias=False, + name='block3_sepconv1')(x) + x = BatchNormalization(name='block3_sepconv1_bn')(x) + x = Activation('relu', name='block3_sepconv2_act')(x) + x = SeparableConv2D(256, (3, 3), + padding='same', + use_bias=False, + name='block3_sepconv2')(x) + x = BatchNormalization(name='block3_sepconv2_bn')(x) + + x = MaxPooling2D((3, 3), + strides=(2, 2), + padding='same', + name='block3_pool')(x) + x = layers.add([x, residual]) + + residual = Conv2D(728, (1, 1), strides=(2, 2), + padding='same', use_bias=False)(x) + residual = BatchNormalization()(residual) + + x = Activation('relu', name='block4_sepconv1_act')(x) + x = SeparableConv2D(728, (3, 3), + padding='same', + use_bias=False, + name='block4_sepconv1')(x) + x = BatchNormalization(name='block4_sepconv1_bn')(x) + x = Activation('relu', name='block4_sepconv2_act')(x) + x = SeparableConv2D(728, (3, 3), + padding='same', + use_bias=False, + name='block4_sepconv2')(x) + network["block4_sepconv2"] = x + x = BatchNormalization(name='block4_sepconv2_bn')(x) + + x = MaxPooling2D((3, 3), + strides=(2, 2), + padding='same', + name='block4_pool')(x) + x = layers.add([x, residual]) + + for i in range(8): + residual = x + prefix = 'block' + str(i + 5) + + x = Activation('relu', name=prefix + '_sepconv1_act')(x) + x = SeparableConv2D(728, (3, 3), + padding='same', + use_bias=False, + name=prefix + '_sepconv1')(x) + x = BatchNormalization(name=prefix + '_sepconv1_bn')(x) + x = Activation('relu', name=prefix + '_sepconv2_act')(x) + x = SeparableConv2D(728, (3, 3), + padding='same', + use_bias=False, + name=prefix + '_sepconv2')(x) + x = BatchNormalization(name=prefix + '_sepconv2_bn')(x) + x = Activation('relu', name=prefix + '_sepconv3_act')(x) + x = SeparableConv2D(728, (3, 3), + padding='same', + use_bias=False, + name=prefix + '_sepconv3')(x) + x = BatchNormalization(name=prefix + '_sepconv3_bn')(x) + + x = layers.add([x, residual]) + + residual = Conv2D(1024, (1, 1), strides=(2, 2), + padding='same', use_bias=False)(x) + residual = BatchNormalization()(residual) + + x = Activation('relu', name='block13_sepconv1_act')(x) + x = SeparableConv2D(728, (3, 3), + padding='same', + use_bias=False, + name='block13_sepconv1')(x) + x = BatchNormalization(name='block13_sepconv1_bn')(x) + x = Activation('relu', name='block13_sepconv2_act')(x) + x = SeparableConv2D(1024, (3, 3), + padding='same', + use_bias=False, + name='block13_sepconv2')(x) + network["block13_sepconv2"] = x + x = BatchNormalization(name='block13_sepconv2_bn')(x) + + x = MaxPooling2D((3, 3), + strides=(2, 2), + padding='same', + name='block13_pool')(x) + x = layers.add([x, residual]) + + x = SeparableConv2D(1536, (3, 3), + padding='same', + use_bias=False, + name='block14_sepconv1')(x) + x = BatchNormalization(name='block14_sepconv1_bn')(x) + x = Activation('relu', name='block14_sepconv1_act')(x) + + x = SeparableConv2D(2048, (3, 3), + padding='same', + use_bias=False, + name='block14_sepconv2')(x) + network["block14_sepconv2"] = x + x = BatchNormalization(name='block14_sepconv2_bn')(x) + x = Activation('relu', name='block14_sepconv2_act')(x) + network["block14_sepconv2_act"] = x + + +def _build_resnet50_basenet(network): + """ + """ + from keras.layers import Activation + from keras.layers import Conv2D + from keras.layers import MaxPooling2D + from keras.layers import ZeroPadding2D + from keras.layers import BatchNormalization + import keras.backend as K + from keras.applications.resnet50 import conv_block, identity_block + if K.image_data_format() == 'channels_last': + bn_axis = 3 + else: + bn_axis = 1 + x = ZeroPadding2D((3, 3))(network["input"]) + x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(x) + x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) + x = Activation('relu')(x) + x = MaxPooling2D((3, 3), strides=(2, 2))(x) + + network["conv_block2"] = conv_block(x, 3, [64, 64, 256], + stage=2, block='a', strides=(1, 1)) + network["identity_block2_1"] = identity_block(network["conv_block2"], + 3, [64, 64, 256], + stage=2, block='b') + network["identity_block2_2"] = identity_block(network["identity_block2_1"], + 3, [64, 64, 256], + stage=2, block='c') + + network["conv_block3"] = conv_block(network["identity_block2_2"], + 3, [128, 128, 512], + stage=3, block='a') + network["identity_block3_1"] = identity_block(network["conv_block3"], + 3, [128, 128, 512], + stage=3, block='b') + network["identity_block3_2"] = identity_block(network["identity_block3_1"], + 3, [128, 128, 512], + stage=3, block='c') + network["identity_block3_3"] = identity_block(network["identity_block3_2"], + 3, [128, 128, 512], + stage=3, block='d') + + network["conv_block4"] = conv_block(network["identity_block3_3"], + 3, [256, 256, 1024], + stage=4, block='a') + network["identity_block4_1"] = identity_block(network["conv_block4"], + 3, [256, 256, 1024], + stage=4, block='b') + network["identity_block4_2"] = identity_block(network["identity_block4_1"], + 3, [256, 256, 1024], + stage=4, block='c') + network["identity_block4_3"] = identity_block(network["identity_block4_2"], + 3, [256, 256, 1024], + stage=4, block='d') + network["identity_block4_4"] = identity_block(network["identity_block4_3"], + 3, [256, 256, 1024], + stage=4, block='e') + network["identity_block4_5"] = identity_block(network["identity_block4_4"], + 3, [256, 256, 1024], + stage=4, block='f') + + network["conv_block5"] = conv_block(network["identity_block4_5"], + 3, [512, 512, 2048], + stage=5, block='a') + network["identity_block5_1"] = identity_block(network["conv_block5"], + 3, [512, 512, 2048], + stage=5, block='b') + network["identity_block5_2"] = identity_block(network["identity_block5_1"], + 3, [512, 512, 2048], + stage=5, block='c') + + +def _build_vgg16_basenet(network): + """ + """ + # convolution layer 1 + network["block1_conv1"] = Conv2D(64, (3, 3), + activation="relu", + padding="same", + name="block1_conv1" + )(network["input"]) + network["block1_conv2"] = Conv2D(64, (3, 3), + activation="relu", + padding="same", + name="block1_conv2" + )(network["block1_conv1"]) + network["block1_pool"] = MaxPool2D((2, 2), + strides=(2, 2), + padding="same", + name="block1_pool" + )(network["block1_conv2"]) + # convlution layer 2 + network["block2_conv1"] = Conv2D(128, (3, 3), + activation="relu", + padding="same", + name="block2_conv1" + )(network["block1_pool"]) + network["block2_conv2"] = Conv2D(128, (3, 3), + activation="relu", + padding="same", + name="block2_conv2" + )(network["block2_conv1"]) + network["block2_pool"] = MaxPool2D((2, 2), + strides=(2, 2), + padding="same", + name="block2_pool" + )(network["block2_conv2"]) + # convlution layer 3 + network["block3_conv1"] = Conv2D(256, (3, 3), + activation="relu", + padding="same", + name="block3_conv1" + )(network["block2_pool"]) + network["block3_conv2"] = Conv2D(256, (3, 3), + activation="relu", + padding="same", + name="block3_conv2" + )(network["block3_conv1"]) + network["block3_conv3"] = Conv2D(256, (3, 3), + activation="relu", + padding="same", + name="block3_conv3" + )(network["block3_conv2"]) + network["block3_pool"] = MaxPool2D((2, 2), + strides=(2, 2), + padding="same", + name="block3_pool" + )(network["block3_conv3"]) + # convlution layer 4 + network["block4_conv1"] = Conv2D(512, (3, 3), + activation="relu", + padding="same", + name="block4_conv1" + )(network["block3_pool"]) + network["block4_conv2"] = Conv2D(512, (3, 3), + activation="relu", + padding="same", + name="block4_conv2" + )(network["block4_conv1"]) + network["block4_conv3"] = Conv2D(512, (3, 3), + activation="relu", + padding="same", + name="block4_conv3" + )(network["block4_conv2"]) + network["block4_pool"] = MaxPool2D((2, 2), + strides=(2, 2), + padding="same", + name="block4_pool" + )(network["block4_conv3"]) + # convlution layer 5 + network["block5_conv1"] = Conv2D(512, (3, 3), + activation="relu", + padding="same", + name="block5_conv1" + )(network["block4_pool"]) + network["block5_conv2"] = Conv2D(512, (3, 3), + activation="relu", + padding="same", + name="block5_conv2" + )(network["block5_conv1"]) + network["block5_conv3"] = Conv2D(512, (3, 3), + activation="relu", + padding="same", + name="block5_conv3" + )(network["block5_conv2"]) + network["block5_pool"] = MaxPool2D((3, 3), + strides=(1, 1), + padding="same", + name="block5_pool" + )(network["block5_conv3"]) + + +def SSD300_vgg16(input_shape, n_classes, aspect_ratios, scales): + """ + """ + network = OrderedDict() + network["input"] = Input(shape=input_shape) + + # base network ------------------------------------------------------- + _build_vgg16_basenet(network) + # block4 l2 normalization + network["block4_norm"] = L2Normalization(20, + name="block4_norm" + )(network["block4_conv3"]) + feature_layers = ["block4_norm", "block7_conv", "block8_conv2", + "block9_conv2", "block10_conv2", "block11_conv2"] + base_last = network["block5_pool"] + + # convolution layer 6 (fc6) + network["block6_conv"] = Conv2D(1024, (3, 3), + dilation_rate=(6, 6), + activation="relu", + padding="same", + name="block6_conv" + )(base_last) + # convolution layer 7 (fc7) + network["block7_conv"] = Conv2D(1024, (1, 1), + activation="relu", + padding="same", + name="block7_conv" + )(network["block6_conv"]) + # extra feature layer --------------------------------------------- + # convolution layer 8 + network["block8_conv1"] = Conv2D(256, (1, 1), + activation="relu", + padding="same", + name="block8_conv1" + )(network["block7_conv"]) + network["block8_conv2"] = Conv2D(512, (3, 3), + strides=(2, 2), + activation="relu", + padding="same", + name="block8_conv2" + )(network["block8_conv1"]) + # convolution layer 9 + network["block9_conv1"] = Conv2D(128, (1, 1), + activation="relu", + padding="same", + name="block9_conv1" + )(network["block8_conv2"]) + network["block9_conv2"] = Conv2D(256, (3, 3), + strides=(2, 2), + activation="relu", + padding="same", + name="block9_conv2" + )(network["block9_conv1"]) + # convolution layer 10 + network["block10_conv1"] = Conv2D(128, (1, 1), + activation="relu", + padding="same", + name="block10_conv1" + )(network["block9_conv2"]) + network["block10_conv2"] = Conv2D(256, (3, 3), + strides=(1, 1), + activation="relu", + padding="valid", + name="block10_conv2" + )(network["block10_conv1"]) + # convolution layer 11 + network["block11_conv1"] = Conv2D(128, (1, 1), + activation="relu", + padding="same", + name="block11_conv1" + )(network["block10_conv2"]) + network["block11_conv2"] = Conv2D(256, (3, 3), + strides=(1, 1), + activation="relu", + padding="valid", + name="block11_conv2" + )(network["block11_conv1"]) + # extra feature layer -------------------------------------------- + + # classifier ----------------------------------------------------- + bboxes = list() + list_loc_layers = list() + list_conf_layers = list() + for i in range(len(feature_layers)): + # make boundary boxes + layer_name = feature_layers[i] + aspect_ratio = aspect_ratios[i] + scale = scales[i] + feature_map_shape = network[layer_name]._keras_shape + bbox = make_bboxes(input_shape, + feature_map_shape, + aspect_ratio, + scale) + bboxes.append(bbox) + n_boxes = int(len(bbox)/feature_map_shape[1]/feature_map_shape[2]) + + # make classifier layers + layer_name_n = layer_name + "_{}".format(n_classes) + network[layer_name_n+"_loc"] = Conv2D(n_boxes * 4, (3, 3), + padding="same", + name=layer_name_n+"_loc" + )(network[layer_name]) + network[layer_name_n+"_loc_flat"] = Flatten( + name=layer_name_n+"_loc_flat")(network[layer_name_n+"_loc"]) + network[layer_name_n+"_conf"] = Conv2D(n_boxes * n_classes, (3, 3), + padding="same", + name=layer_name_n+"_conf" + )(network[layer_name]) + network[layer_name_n+"_conf_flat"] = Flatten( + name=layer_name_n+"_conf_flat")(network[layer_name_n+"_conf"]) + + list_loc_layers.append(network[layer_name_n+"_loc_flat"]) + list_conf_layers.append(network[layer_name_n+"_conf_flat"]) + # classifier ----------------------------------------------------- + + # collect predictions + network["loc"] = concatenate(list_loc_layers, + axis=1, + name="loc") + network["conf"] = concatenate(list_conf_layers, + axis=1, + name="conf") + n_all_boxes = network["loc"]._keras_shape[-1] // 4 + network["predictions_loc"] = Reshape((n_all_boxes, 4), + name="predictions_loc" + )(network["loc"]) + reshaped_conf = Reshape((n_all_boxes, n_classes), + name="reshaped_conf" + )(network["conf"]) + network["predictions_conf"] = Activation("softmax", + name="predictions_conf" + )(reshaped_conf) + network["predictions"] = concatenate([network["predictions_loc"], + network["predictions_conf"]], + axis=2, + name="predictions") + # model + model = Model(network["input"], network["predictions"]) + # bbox + all_bboxes = np.concatenate(bboxes, axis=0).astype("float32") + return model, all_bboxes + + +def SSD512_vgg16(input_shape, n_classes, aspect_ratios, scales): + """ + """ + network = OrderedDict() + network["input"] = Input(shape=input_shape) + + # base network ------------------------------------------------------- + _build_vgg16_basenet(network) + # block4 l2 normalization + network["block4_norm"] = L2Normalization(20, + name="block4_norm" + )(network["block4_conv3"]) + feature_layers = ["block4_norm", "block7_conv", "block8_conv2", + "block9_conv2", "block10_conv2", "block11_conv2", + "block12_conv2"] + base_last = network["block5_pool"] + + # convolution layer 6 (fc6) + network["block6_conv"] = Conv2D(1024, (3, 3), + dilation_rate=(6, 6), + activation="relu", + padding="same", + name="block6_conv" + )(base_last) + # convolution layer 7 (fc7) + network["block7_conv"] = Conv2D(1024, (1, 1), + activation="relu", + padding="same", + name="block7_conv" + )(network["block6_conv"]) + + # extra feature layer --------------------------------------------- + # convolution layer 8 + network["block8_conv1"] = Conv2D(256, (1, 1), + activation="relu", + padding="same", + name="block8_conv1" + )(network["block7_conv"]) + network["block8_conv2"] = Conv2D(512, (3, 3), + strides=(2, 2), + activation="relu", + padding="same", + name="block8_conv2" + )(network["block8_conv1"]) + # convolution layer 9 + network["block9_conv1"] = Conv2D(128, (1, 1), + activation="relu", + padding="same", + name="block9_conv1" + )(network["block8_conv2"]) + network["block9_conv2"] = Conv2D(256, (3, 3), + strides=(2, 2), + activation="relu", + padding="same", + name="block9_conv2" + )(network["block9_conv1"]) + # convolution layer 10 + network["block10_conv1"] = Conv2D(128, (1, 1), + activation="relu", + padding="same", + name="block10_conv1" + )(network["block9_conv2"]) + network["block10_conv2"] = Conv2D(256, (3, 3), + strides=(2, 2), + activation="relu", + padding="same", + name="block10_conv2" + )(network["block10_conv1"]) + # convolution layer 11 + network["block11_conv1"] = Conv2D(128, (1, 1), + activation="relu", + padding="same", + name="block11_conv1" + )(network["block10_conv2"]) + network["block11_conv2"] = Conv2D(256, (3, 3), + strides=(1, 1), + activation="relu", + padding="valid", + name="block11_conv2" + )(network["block11_conv1"]) + # convolution layer 12 + network["block12_conv1"] = Conv2D(128, (1, 1), + activation="relu", + padding="same", + name="block12_conv1" + )(network["block11_conv2"]) + network["block12_conv2"] = Conv2D(256, (2, 2), + strides=(1, 1), + activation="relu", + padding="valid", + name="block12_conv2" + )(network["block12_conv1"]) + # extra feature layer -------------------------------------------- + + # classifier ----------------------------------------------------- + bboxes = list() + list_loc_layers = list() + list_conf_layers = list() + for i in range(len(feature_layers)): + # make boundary boxes + layer_name = feature_layers[i] + aspect_ratio = aspect_ratios[i] + scale = scales[i] + feature_map_shape = network[layer_name]._keras_shape + bbox = make_bboxes(input_shape, + feature_map_shape, + aspect_ratio, + scale) + bboxes.append(bbox) + n_boxes = int(len(bbox)/feature_map_shape[1]/feature_map_shape[2]) + + # make classifier layers + layer_name_n = layer_name + "_{}".format(n_classes) + network[layer_name_n+"_loc"] = Conv2D(n_boxes * 4, (3, 3), + padding="same", + name=layer_name_n+"_loc" + )(network[layer_name]) + network[layer_name_n+"_loc_flat"] = Flatten( + name=layer_name_n+"_loc_flat")(network[layer_name_n+"_loc"]) + network[layer_name_n+"_conf"] = Conv2D(n_boxes * n_classes, (3, 3), + padding="same", + name=layer_name_n+"_conf" + )(network[layer_name]) + network[layer_name_n+"_conf_flat"] = Flatten( + name=layer_name_n+"_conf_flat")(network[layer_name_n+"_conf"]) + + list_loc_layers.append(network[layer_name_n+"_loc_flat"]) + list_conf_layers.append(network[layer_name_n+"_conf_flat"]) + # classifier ----------------------------------------------------- + + # collect predictions + network["loc"] = concatenate(list_loc_layers, + axis=1, + name="loc") + network["conf"] = concatenate(list_conf_layers, + axis=1, + name="conf") + n_all_boxes = network["loc"]._keras_shape[-1] // 4 + network["predictions_loc"] = Reshape((n_all_boxes, 4), + name="predictions_loc" + )(network["loc"]) + reshaped_conf = Reshape((n_all_boxes, n_classes), + name="reshaped_conf" + )(network["conf"]) + network["predictions_conf"] = Activation("softmax", + name="predictions_conf" + )(reshaped_conf) + network["predictions"] = concatenate([network["predictions_loc"], + network["predictions_conf"]], + axis=2, + name="predictions") + # model + model = Model(network["input"], network["predictions"]) + # bbox + all_bboxes = np.concatenate(bboxes, axis=0).astype("float32") + + return model, all_bboxes + + +def SSD300_resnet50(input_shape, n_classes, aspect_ratios, scales): + """ + """ + network = OrderedDict() + network["input"] = Input(shape=input_shape) + + # base network ------------------------------------------------------- + _build_resnet50_basenet(network) + # block3 l2 normalization + network["identity_block3_3_norm"] = L2Normalization( + 20, name="identity_block3_3_norm")(network["identity_block3_3"]) + + feature_layers = ["identity_block3_3_norm", "identity_block4_5", + "identity_block5_2", + "block9_conv2", "block10_conv2", "block11_conv2"] + base_last = network["identity_block5_2"] + + # convolution layer 9 + network["block9_conv1"] = Conv2D(128, (1, 1), + activation="relu", + padding="same", + name="block9_conv1" + )(base_last) + network["block9_conv2"] = Conv2D(256, (3, 3), + strides=(2, 2), + activation="relu", + padding="same", + name="block9_conv2" + )(network["block9_conv1"]) + # convolution layer 10 + network["block10_conv1"] = Conv2D(128, (1, 1), + activation="relu", + padding="same", + name="block10_conv1" + )(network["block9_conv2"]) + network["block10_conv2"] = Conv2D(256, (3, 3), + strides=(1, 1), + activation="relu", + padding="valid", + name="block10_conv2" + )(network["block10_conv1"]) + # convolution layer 11 + network["block11_conv1"] = Conv2D(128, (1, 1), + activation="relu", + padding="same", + name="block11_conv1" + )(network["block10_conv2"]) + network["block11_conv2"] = Conv2D(256, (3, 3), + strides=(1, 1), + activation="relu", + padding="valid", + name="block11_conv2" + )(network["block11_conv1"]) + # extra feature layer -------------------------------------------- + + # classifier ----------------------------------------------------- + bboxes = list() + list_loc_layers = list() + list_conf_layers = list() + for i in range(len(feature_layers)): + # make boundary boxes + layer_name = feature_layers[i] + aspect_ratio = aspect_ratios[i] + scale = scales[i] + feature_map_shape = network[layer_name]._keras_shape + bbox = make_bboxes(input_shape, + feature_map_shape, + aspect_ratio, + scale) + bboxes.append(bbox) + n_boxes = int(len(bbox)/feature_map_shape[1]/feature_map_shape[2]) + + # make classifier layers + layer_name_n = layer_name + "_{}".format(n_classes) + network[layer_name_n+"_loc"] = Conv2D(n_boxes * 4, (3, 3), + padding="same", + name=layer_name_n+"_loc" + )(network[layer_name]) + network[layer_name_n+"_loc_flat"] = Flatten( + name=layer_name_n+"_loc_flat")(network[layer_name_n+"_loc"]) + network[layer_name_n+"_conf"] = Conv2D(n_boxes * n_classes, (3, 3), + padding="same", + name=layer_name_n+"_conf" + )(network[layer_name]) + network[layer_name_n+"_conf_flat"] = Flatten( + name=layer_name_n+"_conf_flat")(network[layer_name_n+"_conf"]) + + list_loc_layers.append(network[layer_name_n+"_loc_flat"]) + list_conf_layers.append(network[layer_name_n+"_conf_flat"]) + # classifier ----------------------------------------------------- + + # collect predictions + network["loc"] = concatenate(list_loc_layers, + axis=1, + name="loc") + network["conf"] = concatenate(list_conf_layers, + axis=1, + name="conf") + n_all_boxes = network["loc"]._keras_shape[-1] // 4 + network["predictions_loc"] = Reshape((n_all_boxes, 4), + name="predictions_loc" + )(network["loc"]) + reshaped_conf = Reshape((n_all_boxes, n_classes), + name="reshaped_conf" + )(network["conf"]) + network["predictions_conf"] = Activation("softmax", + name="predictions_conf" + )(reshaped_conf) + network["predictions"] = concatenate([network["predictions_loc"], + network["predictions_conf"]], + axis=2, + name="predictions") + # model + model = Model(network["input"], network["predictions"]) + # bbox + all_bboxes = np.concatenate(bboxes, axis=0).astype("float32") + + return model, all_bboxes + + +def SSD512_resnet50(input_shape, n_classes, aspect_ratios, scales): + """ + """ + network = OrderedDict() + network["input"] = Input(shape=input_shape) + + # base network ------------------------------------------------------- + _build_resnet50_basenet(network) + # l2 normalization + network["identity_block3_3_norm"] = L2Normalization( + 20, name="identity_block3_3_norm")(network["identity_block3_3"]) + + feature_layers = ["identity_block3_3_norm", "identity_block4_5", + "identity_block5_2", + "block9_conv2", "block10_conv2", "block11_conv2", + "block12_conv2"] + base_last = network["identity_block5_2"] + + # convolution layer 9 + network["block9_conv1"] = Conv2D(128, (1, 1), + activation="relu", + padding="same", + name="block9_conv1" + )(base_last) + network["block9_conv2"] = Conv2D(256, (3, 3), + strides=(2, 2), + activation="relu", + padding="same", + name="block9_conv2" + )(network["block9_conv1"]) + # convolution layer 10 + network["block10_conv1"] = Conv2D(128, (1, 1), + activation="relu", + padding="same", + name="block10_conv1" + )(network["block9_conv2"]) + network["block10_conv2"] = Conv2D(256, (3, 3), + strides=(2, 2), + activation="relu", + padding="same", + name="block10_conv2" + )(network["block10_conv1"]) + # convolution layer 11 + network["block11_conv1"] = Conv2D(128, (1, 1), + activation="relu", + padding="same", + name="block11_conv1" + )(network["block10_conv2"]) + network["block11_conv2"] = Conv2D(256, (3, 3), + strides=(1, 1), + activation="relu", + padding="valid", + name="block11_conv2" + )(network["block11_conv1"]) + # convolution layer 12 + network["block12_conv1"] = Conv2D(128, (1, 1), + activation="relu", + padding="same", + name="block12_conv1" + )(network["block11_conv2"]) + network["block12_conv2"] = Conv2D(256, (2, 2), + strides=(1, 1), + activation="relu", + padding="valid", + name="block12_conv2" + )(network["block12_conv1"]) + # extra feature layer -------------------------------------------- + + # classifier ----------------------------------------------------- + bboxes = list() + list_loc_layers = list() + list_conf_layers = list() + for i in range(len(feature_layers)): + # make boundary boxes + layer_name = feature_layers[i] + aspect_ratio = aspect_ratios[i] + scale = scales[i] + feature_map_shape = network[layer_name]._keras_shape + bbox = make_bboxes(input_shape, + feature_map_shape, + aspect_ratio, + scale) + bboxes.append(bbox) + n_boxes = int(len(bbox)/feature_map_shape[1]/feature_map_shape[2]) + + # make classifier layers + layer_name_n = layer_name + "_{}".format(n_classes) + network[layer_name_n+"_loc"] = Conv2D(n_boxes * 4, (3, 3), + padding="same", + name=layer_name_n+"_loc" + )(network[layer_name]) + network[layer_name_n+"_loc_flat"] = Flatten( + name=layer_name_n+"_loc_flat")(network[layer_name_n+"_loc"]) + network[layer_name_n+"_conf"] = Conv2D(n_boxes * n_classes, (3, 3), + padding="same", + name=layer_name_n+"_conf" + )(network[layer_name]) + network[layer_name_n+"_conf_flat"] = Flatten( + name=layer_name_n+"_conf_flat")(network[layer_name_n+"_conf"]) + + list_loc_layers.append(network[layer_name_n+"_loc_flat"]) + list_conf_layers.append(network[layer_name_n+"_conf_flat"]) + # classifier ----------------------------------------------------- + + # collect predictions + network["loc"] = concatenate(list_loc_layers, + axis=1, + name="loc") + network["conf"] = concatenate(list_conf_layers, + axis=1, + name="conf") + n_all_boxes = network["loc"]._keras_shape[-1] // 4 + network["predictions_loc"] = Reshape((n_all_boxes, 4), + name="predictions_loc" + )(network["loc"]) + reshaped_conf = Reshape((n_all_boxes, n_classes), + name="reshaped_conf" + )(network["conf"]) + network["predictions_conf"] = Activation("softmax", + name="predictions_conf" + )(reshaped_conf) + network["predictions"] = concatenate([network["predictions_loc"], + network["predictions_conf"]], + axis=2, + name="predictions") + # model + model = Model(network["input"], network["predictions"]) + # bbox + all_bboxes = np.concatenate(bboxes, axis=0).astype("float32") + + return model, all_bboxes + + +def SSD300_xception(input_shape, n_classes, aspect_ratios, scales): + """ + """ + network = OrderedDict() + network["input"] = Input(shape=input_shape) + + # base network ------------------------------------------------------- + _build_xception_basenet(network) + # l2 normalization + network["block4_sepconv2_norm"] = L2Normalization( + 20, name="block4_sepconv2_norm")(network["block4_sepconv2"]) + + feature_layers = ["block4_sepconv2_norm", "block13_sepconv2", + "block14_sepconv2", + "block9_conv2", "block10_conv2", "block11_conv2", + ] + base_last = network["block14_sepconv2_act"] + + # convolution layer 9 + network["block9_conv1"] = Conv2D(128, (1, 1), + activation="relu", + padding="same", + name="block9_conv1" + )(base_last) + network["block9_conv2"] = Conv2D(256, (3, 3), + strides=(2, 2), + activation="relu", + padding="same", + name="block9_conv2" + )(network["block9_conv1"]) + # convolution layer 10 + network["block10_conv1"] = Conv2D(128, (1, 1), + activation="relu", + padding="same", + name="block10_conv1" + )(network["block9_conv2"]) + network["block10_conv2"] = Conv2D(256, (3, 3), + strides=(2, 2), + activation="relu", + padding="same", + name="block10_conv2" + )(network["block10_conv1"]) + # convolution layer 11 + network["block11_conv1"] = Conv2D(128, (1, 1), + activation="relu", + padding="same", + name="block11_conv1" + )(network["block10_conv2"]) + network["block11_conv2"] = Conv2D(256, (3, 3), + strides=(1, 1), + activation="relu", + padding="valid", + name="block11_conv2" + )(network["block11_conv1"]) + # extra feature layer -------------------------------------------- + + # classifier ----------------------------------------------------- + bboxes = list() + list_loc_layers = list() + list_conf_layers = list() + for i in range(len(feature_layers)): + # make boundary boxes + layer_name = feature_layers[i] + aspect_ratio = aspect_ratios[i] + scale = scales[i] + feature_map_shape = network[layer_name]._keras_shape + bbox = make_bboxes(input_shape, + feature_map_shape, + aspect_ratio, + scale) + bboxes.append(bbox) + n_boxes = int(len(bbox)/feature_map_shape[1]/feature_map_shape[2]) + + # make classifier layers + layer_name_n = layer_name + "_{}".format(n_classes) + network[layer_name_n+"_loc"] = Conv2D(n_boxes * 4, (3, 3), + padding="same", + name=layer_name_n+"_loc" + )(network[layer_name]) + network[layer_name_n+"_loc_flat"] = Flatten( + name=layer_name_n+"_loc_flat")(network[layer_name_n+"_loc"]) + network[layer_name_n+"_conf"] = Conv2D(n_boxes * n_classes, (3, 3), + padding="same", + name=layer_name_n+"_conf" + )(network[layer_name]) + network[layer_name_n+"_conf_flat"] = Flatten( + name=layer_name_n+"_conf_flat")(network[layer_name_n+"_conf"]) + + list_loc_layers.append(network[layer_name_n+"_loc_flat"]) + list_conf_layers.append(network[layer_name_n+"_conf_flat"]) + # classifier ----------------------------------------------------- + + # collect predictions + network["loc"] = concatenate(list_loc_layers, + axis=1, + name="loc") + network["conf"] = concatenate(list_conf_layers, + axis=1, + name="conf") + n_all_boxes = network["loc"]._keras_shape[-1] // 4 + network["predictions_loc"] = Reshape((n_all_boxes, 4), + name="predictions_loc" + )(network["loc"]) + reshaped_conf = Reshape((n_all_boxes, n_classes), + name="reshaped_conf" + )(network["conf"]) + network["predictions_conf"] = Activation("softmax", + name="predictions_conf" + )(reshaped_conf) + network["predictions"] = concatenate([network["predictions_loc"], + network["predictions_conf"]], + axis=2, + name="predictions") + # model + model = Model(network["input"], network["predictions"]) + # bbox + all_bboxes = np.concatenate(bboxes, axis=0).astype("float32") + + return model, all_bboxes + + +def SSD512_xception(input_shape, n_classes, aspect_ratios, scales): + """ + """ + network = OrderedDict() + network["input"] = Input(shape=input_shape) + + # base network ------------------------------------------------------- + _build_xception_basenet(network) + # l2 normalization + network["block4_sepconv2_norm"] = L2Normalization( + 20, name="block4_sepconv2_norm")(network["block4_sepconv2"]) + + feature_layers = ["block4_sepconv2_norm", "block13_sepconv2", + "block14_sepconv2", + "block9_conv2", "block10_conv2", "block11_conv2", + "block12_conv2"] + base_last = network["block14_sepconv2_act"] + + # convolution layer 9 + network["block9_conv1"] = Conv2D(128, (1, 1), + activation="relu", + padding="same", + name="block9_conv1" + )(base_last) + network["block9_conv2"] = Conv2D(256, (3, 3), + strides=(2, 2), + activation="relu", + padding="same", + name="block9_conv2" + )(network["block9_conv1"]) + # convolution layer 10 + network["block10_conv1"] = Conv2D(128, (1, 1), + activation="relu", + padding="same", + name="block10_conv1" + )(network["block9_conv2"]) + network["block10_conv2"] = Conv2D(256, (3, 3), + strides=(2, 2), + activation="relu", + padding="same", + name="block10_conv2" + )(network["block10_conv1"]) + # convolution layer 11 + network["block11_conv1"] = Conv2D(128, (1, 1), + activation="relu", + padding="same", + name="block11_conv1" + )(network["block10_conv2"]) + network["block11_conv2"] = Conv2D(256, (3, 3), + strides=(1, 1), + activation="relu", + padding="valid", + name="block11_conv2" + )(network["block11_conv1"]) + # convolution layer 12 + network["block12_conv1"] = Conv2D(128, (1, 1), + activation="relu", + padding="same", + name="block12_conv1" + )(network["block11_conv2"]) + network["block12_conv2"] = Conv2D(256, (2, 2), + strides=(1, 1), + activation="relu", + padding="valid", + name="block12_conv2" + )(network["block12_conv1"]) + # extra feature layer -------------------------------------------- + + # classifier ----------------------------------------------------- + bboxes = list() + list_loc_layers = list() + list_conf_layers = list() + for i in range(len(feature_layers)): + # make boundary boxes + layer_name = feature_layers[i] + aspect_ratio = aspect_ratios[i] + scale = scales[i] + feature_map_shape = network[layer_name]._keras_shape + bbox = make_bboxes(input_shape, + feature_map_shape, + aspect_ratio, + scale) + bboxes.append(bbox) + n_boxes = int(len(bbox)/feature_map_shape[1]/feature_map_shape[2]) + + # make classifier layers + layer_name_n = layer_name + "_{}".format(n_classes) + network[layer_name_n+"_loc"] = Conv2D(n_boxes * 4, (3, 3), + padding="same", + name=layer_name_n+"_loc" + )(network[layer_name]) + network[layer_name_n+"_loc_flat"] = Flatten( + name=layer_name_n+"_loc_flat")(network[layer_name_n+"_loc"]) + network[layer_name_n+"_conf"] = Conv2D(n_boxes * n_classes, (3, 3), + padding="same", + name=layer_name_n+"_conf" + )(network[layer_name]) + network[layer_name_n+"_conf_flat"] = Flatten( + name=layer_name_n+"_conf_flat")(network[layer_name_n+"_conf"]) + + list_loc_layers.append(network[layer_name_n+"_loc_flat"]) + list_conf_layers.append(network[layer_name_n+"_conf_flat"]) + # classifier ----------------------------------------------------- + + # collect predictions + network["loc"] = concatenate(list_loc_layers, + axis=1, + name="loc") + network["conf"] = concatenate(list_conf_layers, + axis=1, + name="conf") + n_all_boxes = network["loc"]._keras_shape[-1] // 4 + network["predictions_loc"] = Reshape((n_all_boxes, 4), + name="predictions_loc" + )(network["loc"]) + reshaped_conf = Reshape((n_all_boxes, n_classes), + name="reshaped_conf" + )(network["conf"]) + network["predictions_conf"] = Activation("softmax", + name="predictions_conf" + )(reshaped_conf) + network["predictions"] = concatenate([network["predictions_loc"], + network["predictions_conf"]], + axis=2, + name="predictions") + # model + model = Model(network["input"], network["predictions"]) + # bbox + all_bboxes = np.concatenate(bboxes, axis=0).astype("float32") + + return model, all_bboxes diff --git a/mahjong_sample_web_app/detector/ssd/ssd.py b/mahjong_sample_web_app/detector/ssd/ssd.py new file mode 100755 index 0000000..c7874d2 --- /dev/null +++ b/mahjong_sample_web_app/detector/ssd/ssd.py @@ -0,0 +1,308 @@ +import json +import keras +from .models import SSD300_vgg16, SSD512_vgg16 +from .models import SSD300_resnet50, SSD512_resnet50 +from .models import SSD300_xception +from .losses import MultiBoxLoss +from .utils import BoundaryBox + + +class SingleShotMultiBoxDetector: + """ + """ + + available_type = ["ssd300", "ssd512"] + available_net = ["vgg16", "resnet50", "xception"] + + ar_presets = dict( + ssd300=[ + [2.0, 1 / 2.0], + [2.0, 1 / 2.0, 3.0, 1 / 3.0], + [2.0, 1 / 2.0, 3.0, 1 / 3.0], + [2.0, 1 / 2.0, 3.0, 1 / 3.0], + [2.0, 1 / 2.0], + [2.0, 1 / 2.0], + ], + ssd512=[ + [2.0, 1 / 2.0], + [2.0, 1 / 2.0, 3.0, 1 / 3.0], + [2.0, 1 / 2.0, 3.0, 1 / 3.0], + [2.0, 1 / 2.0, 3.0, 1 / 3.0], + [2.0, 1 / 2.0, 3.0, 1 / 3.0], + [2.0, 1 / 2.0], + [2.0, 1 / 2.0], + ], + ) + scale_presets = dict( + ssd300=[ + (30.0, 60.0), + (60.0, 111.0), + (111.0, 162.0), + (162.0, 213.0), + (213.0, 264.0), + (264.0, 315.0), + ], + ssd512=[ + (20.48, 51.2), + (51.2, 133.12), + (133.12, 215.04), + (215.04, 296.96), + (296.96, 378.88), + (378.88, 460.8), + (460.8, 542.72), + ], + ) + default_shapes = dict(ssd300=(300, 300, 3)) + + def __init__( + self, + n_classes=1, + class_names=["bg"], + input_shape=None, + aspect_ratios=None, + scales=None, + variances=None, + overlap_threshold=0.5, + nms_threshold=0.45, + max_output_size=400, + model_type="ssd300", + base_net="vgg16", + ): + """ + """ + self.n_classes = n_classes + self.class_names = class_names + if "bg" != class_names[0]: + print("Warning: Fist label should be bg." " It'll be added automatically.") + self.class_names = ["bg"] + class_names + self.n_classes += 1 + if input_shape: + self.input_shape = input_shape + else: + self.input_shape = self.default_shapes[model_type] + if aspect_ratios: + self.aspect_ratios = aspect_ratios + else: + self.aspect_ratios = self.ar_presets[model_type] + if scales: + self.scales = scales + else: + self.scales = self.scale_presets[model_type] + if variances: + self.variances = variances + else: + self.variances = [0.1, 0.1, 0.2, 0.2] + self.overlap_threshold = overlap_threshold + self.nms_threshold = nms_threshold + self.max_output_size = max_output_size + self.model_type = model_type + self.base_net = base_net + self.preprocesser = None + if base_net == "vgg16": + from keras.applications.vgg16 import preprocess_input + elif base_net == "resnet50": + from keras.applications.resnet50 import preprocess_input + elif base_net == "xception": + from keras.applications.xception import preprocess_input + else: + raise TypeError("Unknown base net name.") + self.preprocesser = preprocess_input + + self.model = None + self.bboxes = None + + def build(self, init_weight="keras_imagenet"): + """ + """ + # create network + if self.model_type == "ssd300" and self.base_net == "vgg16": + self.model, priors = SSD300_vgg16( + self.input_shape, self.n_classes, self.aspect_ratios, self.scales + ) + elif self.model_type == "ssd300" and self.base_net == "resnet50": + self.model, priors = SSD300_resnet50( + self.input_shape, self.n_classes, self.aspect_ratios, self.scales + ) + elif self.model_type == "ssd300" and self.base_net == "xception": + self.model, priors = SSD300_xception( + self.input_shape, self.n_classes, self.aspect_ratios, self.scales + ) + elif self.model_type == "ssd512" and self.base_net == "vgg16": + self.model, priors = SSD512_vgg16( + self.input_shape, self.n_classes, self.aspect_ratios, self.scales + ) + elif self.model_type == "ssd512" and self.base_net == "resnet50": + self.model, priors = SSD512_resnet50( + self.input_shape, self.n_classes, self.aspect_ratios, self.scales + ) + else: + raise NameError( + "{},{} is not defined. types are {}, basenets are {}.".format( + self.model_type, + self.base_net, + self.available_type, + self.available_net, + ) + ) + + if init_weight is None: + print("Network has not initialized with any pretrained models.") + elif init_weight == "keras_imagenet": + print( + "Initializing network with keras application model" + " pretrained imagenet." + ) + if self.base_net == "vgg16": + import keras.applications.vgg16 as keras_vgg16 + + weights_path = keras_vgg16.get_file( + "vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5", + keras_vgg16.WEIGHTS_PATH_NO_TOP, + cache_subdir="models", + ) + elif self.base_net == "resnet50": + import keras.applications.resnet50 as keras_resnet50 + + weights_path = keras_resnet50.get_file( + "resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5", + keras_resnet50.WEIGHTS_PATH_NO_TOP, + cache_subdir="models", + ) + elif self.base_net == "xception": + import keras.applications.xception as keras_xception + + weights_path = keras_xception.get_file( + "xception_weights_tf_dim_ordering_tf_kernels_notop.h5", + keras_xception.TF_WEIGHTS_PATH_NO_TOP, + cache_subdir="models", + ) + else: + raise NameError("{} is not defined.".format(self.base_net)) + self.model.load_weights(weights_path, by_name=True) + else: + print("Initializing network from file {}.".format(init_weight)) + self.model.load_weights(init_weight, by_name=True) + + # make boundary box class + self.bboxes = BoundaryBox( + n_classes=self.n_classes, + default_boxes=priors, + variances=self.variances, + overlap_threshold=self.overlap_threshold, + nms_threshold=self.nms_threshold, + max_output_size=self.max_output_size, + ) + + def train_by_generator( + self, + gen, + epoch=30, + neg_pos_ratio=3.0, + learning_rate=1e-3, + freeze=None, + checkpoints=None, + optimizer=None, + ): + """ + """ + # set freeze layers + if freeze is None: + freeze = list() + + for L in self.model.layers: + if L.name in freeze: + L.trainable = False + + # train setup + callbacks = list() + if checkpoints: + callbacks.append( + keras.callbacks.ModelCheckpoint( + checkpoints, verbose=1, save_weights_only=True + ) + ) + + def schedule(epoch, decay=0.9): + return learning_rate * decay ** (epoch) + + callbacks.append(keras.callbacks.LearningRateScheduler(schedule)) + + if optimizer is None: + optim = keras.optimizers.Adam(lr=learning_rate) + # optim = keras.optimizers.SGD( + # lr=learning_rate, momentum=0.9, decay=0.0005, nesterov=True + # ) + else: + optim = optimizer + + self.model.compile( + optimizer=optim, + loss=MultiBoxLoss(self.n_classes, neg_pos_ratio=neg_pos_ratio).compute_loss, + ) + history = self.model.fit_generator( + gen.generate(self.preprocesser, True), + int(gen.train_batches / gen.batch_size), + epochs=epoch, + verbose=1, + callbacks=callbacks, + validation_data=gen.generate(self.preprocesser, False), + validation_steps=int(gen.val_batches / gen.batch_size), + workers=1, + ) + + return history + + def save_parameters(self, filepath="./param.json"): + """ + """ + params = dict( + n_classes=self.n_classes, + class_names=self.class_names, + input_shape=self.input_shape, + model_type=self.model_type, + base_net=self.base_net, + aspect_ratios=self.aspect_ratios, + scales=self.scales, + variances=self.variances, + ) + print("Writing parameters into {}.".format(filepath)) + json.dump(params, open(filepath, "w"), indent=4, sort_keys=True) + + def load_parameters(self, filepath): + """ + """ + print("Loading parameters from {}.".format(filepath)) + params = json.load(open(filepath, "r")) + self.n_classes = params["n_classes"] + self.class_names = params["class_names"] + self.input_shape = params["input_shape"] + self.model_type = params["model_type"] + self.base_net = params["base_net"] + self.aspect_ratios = params["aspect_ratios"] + self.scales = params["scales"] + self.variances = params["variances"] + + def detect( + self, + X, + batch_size=1, + verbose=0, + keep_top_k=200, + confidence_threshold=0.01, + do_preprocess=True, + ): + """ + """ + if do_preprocess: + inputs = self.preprocesser(X.copy()) + else: + inputs = X.copy() + + predictions = self.model.predict(inputs, batch_size=batch_size, verbose=verbose) + detections = self.bboxes.detection_out( + predictions, + keep_top_k=keep_top_k, + confidence_threshold=confidence_threshold, + ) + + return detections diff --git a/mahjong_sample_web_app/detector/ssd/utils.py b/mahjong_sample_web_app/detector/ssd/utils.py new file mode 100755 index 0000000..f8531c7 --- /dev/null +++ b/mahjong_sample_web_app/detector/ssd/utils.py @@ -0,0 +1,290 @@ +import os +import tensorflow as tf +import numpy as np +from xml.etree import ElementTree + + +def make_bboxes(input_shape, feature_map_shape, + aspect_ratios, scale): + """ + """ + map_w = feature_map_shape[1] + map_h = feature_map_shape[2] + input_w = input_shape[0] + input_h = input_shape[1] + + # local box's sizes + min_size = scale[0] + box_w = [min_size] + box_h = [min_size] + if len(scale) == 2: + box_w.append(np.sqrt(min_size * scale[1])) + box_h.append(np.sqrt(min_size * scale[1])) + for ar in aspect_ratios: + box_w.append(min_size * np.sqrt(ar)) + box_h.append(min_size / np.sqrt(ar)) + box_w = np.array(box_w)/2/input_w + box_h = np.array(box_h)/2/input_h + + # feature grids + step_w = input_w / map_w + step_h = input_h / map_h + center_h, center_w = np.mgrid[0:map_w, 0:map_h] + 0.5 + # swap h and w due to after reshapes + center_w = (center_w * step_w/input_w).reshape(-1, 1) + center_h = (center_h * step_h/input_h).reshape(-1, 1) + + n_local_box = len(box_w) + bboxes = np.concatenate((center_w, center_h), axis=1) + bboxes = np.tile(bboxes, (1, 2 * n_local_box)) + bboxes[:, ::4] -= box_w + bboxes[:, 1::4] -= box_h + bboxes[:, 2::4] += box_w + bboxes[:, 3::4] += box_h + bboxes = bboxes.reshape(-1, 4) + bboxes = np.minimum(np.maximum(bboxes, 0.0), 1.0) + + return bboxes + + +class BoundaryBox: + """ + """ + def __init__(self, n_classes, default_boxes, variances, + overlap_threshold=0.5, nms_threshold=0.45, + max_output_size=400): + self.n_classes = n_classes + self.default_boxes = default_boxes + self.variances = np.array(variances) + self.overlap_threshold = overlap_threshold + self.nms_threshold = nms_threshold + self.max_output_size = max_output_size + + self.n_boxes = 0 if default_boxes is None else len(default_boxes) + self.boxes = tf.placeholder(dtype='float32', shape=(None, 4)) + self.scores = tf.placeholder(dtype='float32', shape=(None,)) + self.nms = tf.image.non_max_suppression( + self.boxes, self.scores, + self.max_output_size, + iou_threshold=self.nms_threshold + ) + self.sess = tf.Session(config=tf.ConfigProto(device_count={'GPU': 0})) + + def iou(self, box): + """Compute intersection over union for the box with all default_boxes. + + # Arguments + box: Box, numpy tensor of shape (4,). + + # Return + iou: Intersection over union, + numpy tensor of shape (num_default_boxes). + """ + # compute intersection + inter_upleft = np.maximum(self.default_boxes[:, :2], box[:2]) + inter_botright = np.minimum(self.default_boxes[:, 2:4], box[2:]) + inter_wh = inter_botright - inter_upleft + inter_wh = np.maximum(inter_wh, 0) + inter = inter_wh[:, 0] * inter_wh[:, 1] + # compute union + area_pred = (box[2] - box[0]) * (box[3] - box[1]) + area_gt = (self.default_boxes[:, 2] - self.default_boxes[:, 0]) + area_gt *= (self.default_boxes[:, 3] - self.default_boxes[:, 1]) + union = area_pred + area_gt - inter + # compute iou + iou = inter / union + return iou + + def encode(self, box, return_iou=True): + """Encode ground truth box into default boxes for training. + Args: + box: Box, numpy tensor of shape (4,). + return_iou: Whether to concat iou to encoded values. + + Returns: + encoded_box: Tensor with encoded box + numpy tensor of shape (n_boxes, 4 + int(return_iou)). + """ + iou = self.iou(box) + encoded_box = np.zeros((self.n_boxes, 4 + return_iou)) + assign_mask = iou > self.overlap_threshold + if not assign_mask.any(): + assign_mask[iou.argmax()] = True + if return_iou: + encoded_box[:, -1][assign_mask] = iou[assign_mask] + assigned_default_boxes = self.default_boxes[assign_mask] + box_center = 0.5 * (box[:2] + box[2:]) + box_wh = box[2:] - box[:2] + assigned_default_boxes_center = 0.5 * (assigned_default_boxes[:, :2] + + assigned_default_boxes[:, 2:4]) + assigned_default_boxes_wh = (assigned_default_boxes[:, 2:4] - + assigned_default_boxes[:, :2]) + # we encode variance + encoded_box[:, :2][assign_mask] = \ + box_center - assigned_default_boxes_center + encoded_box[:, :2][assign_mask] /= assigned_default_boxes_wh + encoded_box[:, :2][assign_mask] /= np.array([self.variances[:2]]) + encoded_box[:, 2:4][assign_mask] = np.log(box_wh / + assigned_default_boxes_wh) + encoded_box[:, 2:4][assign_mask] /= np.array([self.variances[2:]]) + return encoded_box + + def assign_boxes(self, boxes): + """ Assign boxes into default box for training. + + Args: + boxes: Box, numpy tensor of shape (n_boxes, 4 + n_classes), + n_classes is NOT including background. + + Returns: + assignment: Tensor with assigned boxes, + numpy tensor of shape (n_boxes, 4 + n_classes), + n_classes in inlucding background. + assigment[4] indicates BG confidence. + """ + assignment = np.zeros((self.n_boxes, 4 + self.n_classes)) + assignment[:, 4] = 1.0 + if len(boxes) == 0: + return assignment + encoded_boxes = np.apply_along_axis(self.encode, 1, boxes[:, :4]) + best_iou = encoded_boxes[:, :, -1].max(axis=0) + best_iou_idx = encoded_boxes[:, :, -1].argmax(axis=0) + best_iou_mask = best_iou > 0 + best_iou_idx = best_iou_idx[best_iou_mask] + assign_num = len(best_iou_idx) + encoded_boxes = encoded_boxes[:, best_iou_mask, :] + assignment[:, :4][best_iou_mask] = encoded_boxes[best_iou_idx, + np.arange(assign_num), + :4] + assignment[:, 4][best_iou_mask] = 0 + assignment[:, 5:][best_iou_mask] = boxes[best_iou_idx, 4:] + return assignment + + def decode(self, mbox_loc): + """Convert bboxes from local predictions to shifted priors. + + # Arguments + mbox_loc: Numpy array of predicted locations. + + # Return + decode_bbox: Shifted priors. + """ + prior_width = self.default_boxes[:, 2] - self.default_boxes[:, 0] + prior_height = self.default_boxes[:, 3] - self.default_boxes[:, 1] + prior_center_x = \ + 0.5 * (self.default_boxes[:, 2] + self.default_boxes[:, 0]) + prior_center_y = \ + 0.5 * (self.default_boxes[:, 3] + self.default_boxes[:, 1]) + decode_bbox_center_x = mbox_loc[:, 0] * prior_width * self.variances[0] + decode_bbox_center_x += prior_center_x + decode_bbox_center_y = mbox_loc[:, 1] * prior_width * self.variances[1] + decode_bbox_center_y += prior_center_y + decode_bbox_width = np.exp(mbox_loc[:, 2] * self.variances[2]) + decode_bbox_width *= prior_width + decode_bbox_height = np.exp(mbox_loc[:, 3] * self.variances[3]) + decode_bbox_height *= prior_height + decode_bbox_xmin = decode_bbox_center_x - 0.5 * decode_bbox_width + decode_bbox_ymin = decode_bbox_center_y - 0.5 * decode_bbox_height + decode_bbox_xmax = decode_bbox_center_x + 0.5 * decode_bbox_width + decode_bbox_ymax = decode_bbox_center_y + 0.5 * decode_bbox_height + decode_bbox = np.concatenate((decode_bbox_xmin[:, None], + decode_bbox_ymin[:, None], + decode_bbox_xmax[:, None], + decode_bbox_ymax[:, None]), axis=-1) + decode_bbox = np.minimum(np.maximum(decode_bbox, 0.0), 1.0) + return decode_bbox + + def detection_out(self, predictions, keep_top_k=200, + confidence_threshold=0.01): + """Do non maximum suppression (nms) on prediction results. + + # Arguments + predictions: Numpy array of predicted values. + num_classes: Number of classes for prediction. + keep_top_k: Number of total bboxes to be kept per image + after nms step. + confidence_threshold: Only consider detections, + whose confidences are larger than a threshold. + + # Return + results: List of predictions for every picture. Each prediction is: + [label, confidence, xmin, ymin, xmax, ymax] + """ + mbox_loc = predictions[:, :, :4] + mbox_conf = predictions[:, :, 4:] + results = [] + for i in range(len(mbox_loc)): + results.append([]) + decode_bbox = self.decode(mbox_loc[i]) + for c in range(self.n_classes): + if c == 0: + continue + c_confs = mbox_conf[i, :, c] + c_confs_m = c_confs > confidence_threshold + if len(c_confs[c_confs_m]) > 0: + boxes_to_process = decode_bbox[c_confs_m] + confs_to_process = c_confs[c_confs_m] + feed_dict = {self.boxes: boxes_to_process, + self.scores: confs_to_process} + idx = self.sess.run(self.nms, feed_dict=feed_dict) + good_boxes = boxes_to_process[idx] + confs = confs_to_process[idx][:, None] + labels = c * np.ones((len(idx), 1)) + c_pred = np.concatenate((labels, confs, good_boxes), + axis=1) + results[-1].extend(c_pred) + if len(results[-1]) > 0: + results[-1] = np.array(results[-1]) + argsort = np.argsort(results[-1][:, 1])[::-1] + results[-1] = results[-1][argsort] + results[-1] = results[-1][:keep_top_k] + return results + + +class VOCAnnotationReader(object): + + def __init__(self, data_path, label_names): + self.path_prefix = data_path + self.label_names = label_names + self.num_classes = len(label_names) + self.data = dict() + self._read_xml() + + def _read_xml(self): + filenames = os.listdir(self.path_prefix) + for filename in filenames: + tree = ElementTree.parse(os.sep.join(( + self.path_prefix, filename + ))) + root = tree.getroot() + bounding_boxes = [] + one_hot_classes = [] + size_tree = root.find('size') + width = float(size_tree.find('width').text) + height = float(size_tree.find('height').text) + for object_tree in root.findall('object'): + for bounding_box in object_tree.iter('bndbox'): + xmin = float(bounding_box.find('xmin').text)/width + ymin = float(bounding_box.find('ymin').text)/height + xmax = float(bounding_box.find('xmax').text)/width + ymax = float(bounding_box.find('ymax').text)/height + bounding_box = [xmin, ymin, xmax, ymax] + bounding_boxes.append(bounding_box) + class_name = object_tree.find('name').text + one_hot_class = self._to_one_hot(class_name) + one_hot_classes.append(one_hot_class) + image_name = root.find('filename').text + bounding_boxes = np.asarray(bounding_boxes) + one_hot_classes = np.asarray(one_hot_classes) + image_data = np.hstack((bounding_boxes, one_hot_classes)) + self.data[image_name] = image_data + + def _to_one_hot(self, name): + one_hot_vector = [0] * self.num_classes + if name in self.label_names: + index = self.label_names.index(name) + one_hot_vector[index] = 1 + else: + print('unknown label: %s' % name) + + return one_hot_vector diff --git a/mahjong_sample_web_app/main.py b/mahjong_sample_web_app/main.py index 6ccf2a9..e631657 100644 --- a/mahjong_sample_web_app/main.py +++ b/mahjong_sample_web_app/main.py @@ -11,6 +11,7 @@ from flask import jsonify, request, render_template, redirect, url_for from . import app from .settings import jihai_numbers, yaku_ja_map, rule +from .detector.detector import detect class ParamError(Exception): @@ -19,28 +20,31 @@ class ParamError(Exception): @app.route("/", methods=["GET"]) def index(): + return "test" return render_template("index.html") @app.route("/upload", methods=["POST"]) def upload(): - image = request.form.get("image") - pies = [ - "2m", - "3m", - "4m", - "4m", - "4m", - "2s", - "3s", - "4s", - "e", - "e", - "e", - "2p", - "3p", - "4p", - ] + img_obj = request.files.get("image") + pies = detect(img_obj) + app.logger.debug("pies: %s", pies) + # pies = [ + # "2m", + # "3m", + # "4m", + # "4m", + # "4m", + # "2s", + # "3s", + # "4s", + # "e", + # "e", + # "e", + # "2p", + # "3p", + # "4p", + # ] pies_str = "|".join(pies) return redirect(url_for("confirm", pies=pies_str)) @@ -64,40 +68,40 @@ def _get_pi_objs(): naki_4 = [] dora_pies = [] dora_objs = [] - for i in range(18): - pi = request.args.get(f"pi-{i}") - if pi: - pies.append(pi) - - agari_pi_req = request.args.get(f"agari-{i}") - if agari_pi_req: - agari_pi = pi - agari_pi_num = str(i) - - naki_num = request.args.get(f"naki-{i}") - if naki_num: - naki_pi_dict.update({f"naki-{i}": naki_num}) - if naki_num == "1": - naki_1.append(pi) - elif naki_num == "2": - naki_2.append(pi) - elif naki_num == "3": - naki_3.append(pi) - elif naki_num == "4": - naki_4.append(pi) - is_dora = request.args.get(f"dora-{i}") - if is_dora == "1": - dora_pies.append(pi) - if len(pies) < 14: - errors.append(f"Less pi obj: {len(pies)}") - pies_obj = pies_to_group(pies) - app.logger.debug(agari_pi) - if not agari_pi: - errors.append(f"No agari pi obj") - else: - agari_obj = str_to_pi_obj(agari_pi) - if agari_obj is None: - errors.append(f"Can't make obj: {agari_obj}") + # for i in range(18): + # pi = request.args.get(f"pi-{i}") + # if pi: + # pies.append(pi) + + # agari_pi_req = request.args.get(f"agari-{i}") + # if agari_pi_req: + # agari_pi = pi + # agari_pi_num = str(i) + + # naki_num = request.args.get(f"naki-{i}") + # if naki_num: + # naki_pi_dict.update({f"naki-{i}": naki_num}) + # if naki_num == "1": + # naki_1.append(pi) + # elif naki_num == "2": + # naki_2.append(pi) + # elif naki_num == "3": + # naki_3.append(pi) + # elif naki_num == "4": + # naki_4.append(pi) + # is_dora = request.args.get(f"dora-{i}") + # if is_dora == "1": + # dora_pies.append(pi) + # if len(pies) < 14: + # errors.append(f"Less pi obj: {len(pies)}") + # pies_obj = pies_to_group(pies) + # app.logger.debug(agari_pi) + # if not agari_pi: + # errors.append(f"No agari pi obj") + # else: + # agari_obj = str_to_pi_obj(agari_pi) + # if agari_obj is None: + # errors.append(f"Can't make obj: {agari_obj}") app.logger.debug("Melds: %s, %s, %s, %s", naki_1, naki_2, naki_3, naki_4) melds = _get_meld_pies(naki_1, naki_2, naki_3, naki_4) @@ -152,61 +156,62 @@ def _get_meld_pies(naki_1, naki_2, naki_3, naki_4): def _get_attr_setting(): - round_wind_str = request.args.get("ba") - if round_wind_str == "ton-ba": - round_wind = EAST - elif round_wind_str == "nan-ba": - round_wind = SOUTH - else: - raise ParamError(f"No ba: {round_wind_str}") - player_wind_str = request.args.get("kaze") - if player_wind_str == "ton": - player_wind = EAST - elif player_wind_str == "nan": - player_wind = SOUTH - elif player_wind_str == "sha": - player_wind = WEST - elif player_wind_str == "pei": - player_wind = NORTH - else: - raise ParamError(f"No kaze: {player_wind}") - ron = request.args.get("ron") - tsumo = request.args.get("tsumo") - if tsumo == "1": - is_tsumo = True - is_ron = False - elif ron == "1": - is_tsumo = False - is_ron = True - else: - raise ParamError(f"No Ron or Tsumo") - riichi = request.args.get("riichi") - attr = { - "round_wind": round_wind, - "player_wind": player_wind, - "is_tsumo": is_tsumo, - # "opened": False, # TODO - "is_riichi": riichi, - # "is_ippatsu": False, - # "is_rinshan": False, - # "is_chankan": False, - # "is_haitei": False, - # "is_houtei": False, - # "is_daburu_riichi": False, - # "is_nagashi_mangan": False, - # "is_tenhou": False, - # "is_renhou": False, - # "is_chiihou": False, - } - tiles_attr = { - "is_tsumo": is_tsumo, - "is_ron": is_ron, - "is_riichi": riichi, - "round_wind_str": round_wind_str, - "player_wind_str": player_wind_str, - } - - return attr, tiles_attr + return {}, {} + # round_wind_str = request.args.get("ba") + # if round_wind_str == "ton-ba": + # round_wind = EAST + # elif round_wind_str == "nan-ba": + # round_wind = SOUTH + # else: + # raise ParamError(f"No ba: {round_wind_str}") + # player_wind_str = request.args.get("kaze") + # if player_wind_str == "ton": + # player_wind = EAST + # elif player_wind_str == "nan": + # player_wind = SOUTH + # elif player_wind_str == "sha": + # player_wind = WEST + # elif player_wind_str == "pei": + # player_wind = NORTH + # else: + # raise ParamError(f"No kaze: {player_wind}") + # ron = request.args.get("ron") + # tsumo = request.args.get("tsumo") + # if tsumo == "1": + # is_tsumo = True + # is_ron = False + # elif ron == "1": + # is_tsumo = False + # is_ron = True + # else: + # raise ParamError(f"No Ron or Tsumo") + # riichi = request.args.get("riichi") + # attr = { + # "round_wind": round_wind, + # "player_wind": player_wind, + # "is_tsumo": is_tsumo, + # # "opened": False, # TODO + # "is_riichi": riichi, + # # "is_ippatsu": False, + # # "is_rinshan": False, + # # "is_chankan": False, + # # "is_haitei": False, + # # "is_houtei": False, + # # "is_daburu_riichi": False, + # # "is_nagashi_mangan": False, + # # "is_tenhou": False, + # # "is_renhou": False, + # # "is_chiihou": False, + # } + # tiles_attr = { + # "is_tsumo": is_tsumo, + # "is_ron": is_ron, + # "is_riichi": riichi, + # "round_wind_str": round_wind_str, + # "player_wind_str": player_wind_str, + # } + + # return attr, tiles_attr @app.route("/calc", methods=["GET"]) diff --git a/requirements-detector.txt b/requirements-detector.txt new file mode 100644 index 0000000..4b3d9fa --- /dev/null +++ b/requirements-detector.txt @@ -0,0 +1,106 @@ +absl-py==0.6.1 +astor==0.7.1 +autopep8==1.4.4 +backcall==0.1.0 +bleach==3.0.2 +bokeh==1.1.0 +cffi==1.11.5 +chardet==2.3.0 +Click==7.0 +contextlib2==0.5.5 +cycler==0.10.0 +Cython==0.29.7 +decorator==4.3.0 +defusedxml==0.5.0 +entrypoints==0.3 +flake8==3.7.8 +future==0.17.1 +gast==0.2.0 +grpcio==1.16.0 +gym==0.12.1 +h5py==2.8.0 +holoviews==1.12.2 +horovod==0.15.2 +imageio==2.5.0 +ipykernel==5.1.0 +ipython==7.1.1 +ipython-genutils==0.2.0 +ipywidgets==7.4.2 +jedi==0.13.1 +Jinja2==2.10 +jsonschema==2.6.0 +jupyter==1.0.0 +jupyter-client==5.2.3 +jupyter-console==6.0.0 +jupyter-core==4.4.0 +Keras==2.2.4 +Keras-Applications==1.0.6 +Keras-Preprocessing==1.0.5 +kiwisolver==1.0.1 +lxml==4.3.3 +Markdown==3.0.1 +MarkupSafe==1.1.0 +matplotlib==3.0.1 +mccabe==0.6.1 +mistune==0.8.4 +nbconvert==5.4.0 +nbformat==4.4.0 +networkx==2.3 +notebook==5.7.0 +numpy==1.15.4 +packaging==19.0 +pandas==0.23.4 +pandocfilters==1.4.2 +param==1.9.0 +parso==0.3.1 +pexpect==4.6.0 +pickleshare==0.7.5 +Pillow==6.2.0 +prometheus-client==0.4.2 +prompt-toolkit==2.0.7 +protobuf==3.6.1 +ptyprocess==0.6.0 +pycodestyle==2.5.0 +pycparser==2.19 +pycurl==7.43.0 +pydot-ng==2.0.0 +pyflakes==2.1.1 +pyglet==1.3.2 +Pygments==2.2.0 +pygobject==3.20.0 +pyparsing==2.3.0 +python-apt==1.1.0b1+ubuntu0.16.4.2 +python-dateutil==2.7.5 +pytz==2018.7 +pyviz-comms==0.7.2 +PyWavelets==1.0.3 +PyYAML==3.13 +pyzmq==17.1.2 +qtconsole==4.4.2 +requests==2.9.1 +scikit-image==0.15.0 +scikit-learn==0.20.0 +scipy==1.1.0 +seaborn==0.9.0 +Send2Trash==1.5.0 +six==1.11.0 +sklearn==0.0 +ssh-import-id==5.5 +tensorboard==1.12.0 +tensorflow-gpu==1.12.0 +termcolor==1.1.0 +terminado==0.8.1 +testpath==0.4.2 +toml==0.10.0 +torch==1.0.0 +torchvision==0.2.1 +tornado==5.1.1 +tqdm==4.32.1 +traitlets==4.3.2 +urllib3==1.13.1 +wcwidth==0.1.7 +webencodings==0.5.1 +Werkzeug==0.14.1 +widgetsnbextension==3.4.2 + +flask==0.11.1 diff --git a/requirements.txt b/requirements.txt index d1fd4b4..ab94365 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,45 @@ +absl-py==0.8.1 appdirs==1.4.3 +astor==0.8.0 +atomicwrites==1.3.0 attrs==19.2.0 black==19.3b0 Click==7.0 Flask==1.1.1 +gast==0.2.2 +google-pasta==0.1.7 +grpcio==1.24.1 +h5py==2.10.0 +importlib-metadata==0.23 itsdangerous==1.1.0 Jinja2==2.10.1 +Keras==2.3.1 +Keras-Applications==1.0.8 +Keras-Preprocessing==1.1.0 +mahjong==1.1.9 +Markdown==3.1.1 MarkupSafe==1.1.1 +more-itertools==7.2.0 +numpy==1.17.2 +opt-einsum==3.1.0 +packaging==19.2 pi==0.1.2 +Pillow==6.2.0 +pluggy==0.13.0 +protobuf==3.10.0 +py==1.8.0 +pyparsing==2.4.2 +pytest==5.2.1 +PyYAML==5.1.2 +scipy==1.3.1 +six==1.12.0 +tensorboard==1.14.0 +tensorflow==1.14.0 +tensorflow-estimator==1.14.0 +termcolor==1.1.0 toml==0.10.0 +uWSGI==2.0.18 +wcwidth==0.1.7 Werkzeug==0.16.0 +wrapt==1.11.2 +zipp==0.6.0 diff --git a/setup.py b/setup.py index 85fa6e6..04a3f10 100644 --- a/setup.py +++ b/setup.py @@ -18,6 +18,12 @@ # "flask-cors", "Jinja2", "mahjong", + "uWSGI", + "pillow", + "numpy", + # "scipy", + "keras", + # "tensorflow", ], author="Manabu TERADA", author_email="terada@cmscom.jp", diff --git a/uwsgi.ini b/uwsgi.ini new file mode 100644 index 0000000..f3299c1 --- /dev/null +++ b/uwsgi.ini @@ -0,0 +1,10 @@ +[uwsgi] +wsgi-file = /code/wsgi.py +callable = app +master = false +processes = 1 +; socket = :3031 +http = :8080 +chmod-socket = 666 +vacuum = true +die-on-term = true \ No newline at end of file diff --git a/wsgi.py b/wsgi.py new file mode 100644 index 0000000..5862f28 --- /dev/null +++ b/wsgi.py @@ -0,0 +1,8 @@ +import os +import logging +from mahjong_sample_web_app.run import create_app + + +if os.environ.get("DEBUG"): + logging.basicConfig(level=logging.DEBUG) +app = create_app("local")