From 390ccac4236245a1cc79dbc0f8947de73e569156 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Sat, 2 Feb 2019 14:26:17 +0800 Subject: [PATCH 01/40] new: use flask cli for interface --- setup.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/setup.py b/setup.py index 3d56dba..85bacfd 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,6 @@ #!/usr/bin/env python from setuptools import setup, find_packages -""" -TODO -- copy or link `python` folder to `transformation_invariant_image_search` -""" def readme(): with open('README.md') as f: @@ -29,6 +25,9 @@ def readme(): zip_safe=False, python_requires='>=3.6', install_requires=[ + 'Flask-Admin==1.5.3', + 'Flask-SQLAlchemy>=2.3.2', + 'Flask>=1.0.2', 'hiredis', 'numpy', 'redis', @@ -36,9 +35,15 @@ def readme(): 'scipy', 'tqdm>=4.29.1', ], + extras_require={ + 'dev': [ + 'docutils==0.14', + 'pytest==4.2.0', + ], + }, entry_points={ 'console_scripts': [ - 'transformation-invariant-image-search = transformation_invariant_image_search.main:main'] + 'transformation-invariant-image-search = transformation_invariant_image_search.main:cli'] }, classifiers=[ 'Development Status :: 3 - Alpha', From 1c7359141b36a3773514f99067a19cd278c2afe3 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Sat, 2 Feb 2019 14:26:59 +0800 Subject: [PATCH 02/40] new: app models --- .../models.py | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 transformation_invariant_image_search/models.py diff --git a/transformation_invariant_image_search/models.py b/transformation_invariant_image_search/models.py new file mode 100644 index 0000000..a30767f --- /dev/null +++ b/transformation_invariant_image_search/models.py @@ -0,0 +1,73 @@ +from flask import Flask +from flask_sqlalchemy import SQLAlchemy + +DB = SQLAlchemy() + +triangle_points = DB.Table( + 'triangle_points', + DB.Column('triangle_phash_id', DB.Integer, DB.ForeignKey('triangle_phash.id'), primary_key=True), + DB.Column('point_id', DB.Integer, DB.ForeignKey('point.id'), primary_key=True)) +triangle_phashes = DB.Table( + 'triangle_phashes', + DB.Column('triangle_phash_id', DB.Integer, DB.ForeignKey('triangle_phash.id'), primary_key=True), + DB.Column('phash_id', DB.Integer, DB.ForeignKey('phash.id'), primary_key=True)) + + +class Base(DB.Model): + __abstract__ = True + id = DB.Column(DB.Integer, primary_key=True) + + +class Checksum(Base): + value = DB.Column(DB.String(), unique=True, nullable=False) + trash = DB.Column(DB.Boolean(), default=False) + ext = DB.Column(DB.String(), nullable=False) + + def __repr__(self): + templ = '' + return templ.format(self) + + +class Point(Base): + x = DB.Column(DB.Integer(), nullable=False) + y = DB.Column(DB.Integer(), nullable=False) + + def __repr__(self): + templ = '' + return templ.format(self) + + +class Phash(Base): + value = DB.Column(DB.String(), unique=True, nullable=False) + + def __repr__(self): + templ = '' + return templ.format(self) + + +class TrianglePhash(Base): + checksum_id = DB.Column() + checksum = DB.Column() + points = DB.relationship('Point', secondary=triangle_points, lazy='subquery', + backref=DB.backref('triangle_phashes', lazy=True)) + phashes = DB.relationship('Phash', secondary=triangle_phashes, lazy='subquery', + backref=DB.backref('triangle_phashes', lazy=True)) + + def __repr__(self): + templ = '' + return templ.format( + self, + ','.join(['({0.x, 0.y})'.format(x) for x in Point]), + ','.join(['{0.value}'.format(x) for x in Point]), + ) + + +def get_or_create(session, model, **kwargs): + """Creates an object or returns the object if exists.""" + instance = session.query(model).filter_by(**kwargs).first() + created = False + if not instance: + instance = model(**kwargs) + session.add(instance) + created = True + return instance, created From 58e5eaf1c83c8b398c059c8ae1c729f0ee4ddc0e Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Sat, 2 Feb 2019 14:27:22 +0800 Subject: [PATCH 03/40] new: flask cli interface --- transformation_invariant_image_search/main.py | 95 +++++++++++++++++-- 1 file changed, 85 insertions(+), 10 deletions(-) diff --git a/transformation_invariant_image_search/main.py b/transformation_invariant_image_search/main.py index 84d2097..67008fb 100644 --- a/transformation_invariant_image_search/main.py +++ b/transformation_invariant_image_search/main.py @@ -2,17 +2,27 @@ Usage: main.py lookup ... main.py insert ... """ -import sys -import multiprocessing from collections import Counter from os import cpu_count +import multiprocessing +import sys +import os +from flask import Flask +from flask.cli import FlaskGroup +from flask_admin import Admin, AdminIndexView +import click import cv2 -import redis import numpy as np +import redis from .keypoints import compute_keypoints from .phash import triangles_from_keypoints, hash_triangles +from .models import DB + + +__version__ = '0.0.1' +DEFAULT_DB_URI = None def phash_triangles(img, triangles, batch_size=None): @@ -68,12 +78,77 @@ def lookup(chunks, filename): print(f'{num:<10d} {key.decode("utf-8")}') -def main(): - if len(sys.argv) < 3: - print(__doc__) - exit(1) - - command, *filenames = sys.argv[1:] +def create_app(script_info=None, db_uri=DEFAULT_DB_URI): + """create app.""" + app = Flask(__name__) + app.config['SQLALCHEMY_DATABASE_URI'] = db_uri # NOQA + app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False + app.config['SECRET_KEY'] = os.getenv('TIIS_SECRET_KEY') or os.urandom(24) + app.config['WTF_CSRF_ENABLED'] = False + # app and db + # DB.init_app(app) + # app.app_context().push() + # db.create_all() + + @app.shell_context_processor + def shell_context(): + return {'app': app, 'db': DB, 'models': models, 'session': DB.session} + + # Migrate(app, DB) + # flask-admin + app_admin = Admin( + app, name='Transformation Image Search', template_mode='bootstrap3', + index_view=AdminIndexView( + # name='Home', + # template='admin/myhome.html', + url='/' + ) + ) + # index_view=views.HomeView(name='Home', template='transformation_invariant_image_search/index.html', url='/')) # NOQA + return app + + +def get_custom_version(ctx, param, value): + # if not value or ctx.resilient_parsing: + # return + message = '{app_name} {app_version}\nFlask {version}\nPython {python_version}' + click.echo(message.format(**{ + 'app_name': 'Transformation Invariant Image Search', + 'app_version': __version__, + 'version': flask_version, + 'python_version': sys.version, + }), color=ctx.color) + ctx.exit() + + +class CustomFlaskGroup(FlaskGroup): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.params[0].help = 'Show the program version' + self.params[0].callback = get_custom_version + + +@click.group(cls=CustomFlaskGroup, create_app=create_app) +def cli(): + """CLI interface for Transformation Invariant Image Search.""" + pass + + +@cli.command() +@click.argument('image', nargs=-1) +def insert(image): + """Insert image's triangle phashes to database.""" + main('insert', image) + + +@cli.command() +@click.argument('image', nargs=-1) +def lookup(image): + """Lookup image's triangle phashes in database.""" + main('lookup', image) + + +def main(command, filenames): command = insert if command == 'insert' else lookup r = redis.StrictRedis(host='localhost', port=6379, db=0) @@ -97,4 +172,4 @@ def main(): if __name__ == '__main__': - main() + cli() From f138310ed1b28161158397e7fdb21365c18aa041 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Sat, 2 Feb 2019 14:27:34 +0800 Subject: [PATCH 04/40] new: simple main test --- tests/test_main.py | 47 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 tests/test_main.py diff --git a/tests/test_main.py b/tests/test_main.py new file mode 100644 index 0000000..2c3daf6 --- /dev/null +++ b/tests/test_main.py @@ -0,0 +1,47 @@ +import os +import tempfile + +from click.testing import CliRunner +import click +import pytest + +from transformation_invariant_image_search import main + + + +@pytest.fixture +def client(): + app = main.create_app() + db_fd, app.config['DATABASE'] = tempfile.mkstemp() + app.config['TESTING'] = True + client = app.test_client() + + with app.app_context(): + # flaskr.init_db() + pass + + yield client + + os.close(db_fd) + os.unlink(app.config['DATABASE']) + + +def test_empty_db(client): + """Start with a blank database.""" + + rv = client.get('/') + assert b'Home - Transformation Image Search' in rv.data + + +def test_help(): + runner = CliRunner() + result = runner.invoke(main.cli, ['--help']) + assert result.exit_code == 0 + assert 'Usage:' in result.output + + +def test_version(): + runner = CliRunner() + result = runner.invoke(main.cli, ['--version']) + assert result.exit_code == 0 + # assert '' in result.output From b38c7d95d3756c4e443d7baa80ba0e0077633750 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Wed, 6 Feb 2019 07:52:33 +0800 Subject: [PATCH 05/40] new: merge cli test --- tests/test_main.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/tests/test_main.py b/tests/test_main.py index 2c3daf6..2d5892b 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -8,7 +8,6 @@ from transformation_invariant_image_search import main - @pytest.fixture def client(): app = main.create_app() @@ -33,15 +32,13 @@ def test_empty_db(client): assert b'Home - Transformation Image Search' in rv.data -def test_help(): - runner = CliRunner() - result = runner.invoke(main.cli, ['--help']) - assert result.exit_code == 0 - assert 'Usage:' in result.output - - -def test_version(): +@pytest.mark.parametrize( + 'args,word', + [('--help', 'Usage:'), ('--version', None)] +) +def test_cli(args, word): runner = CliRunner() - result = runner.invoke(main.cli, ['--version']) + result = runner.invoke(main.cli, [args]) assert result.exit_code == 0 - # assert '' in result.output + if word is not None: + assert word in result.output From 9481407c68876d8761b0676ab32c0063f3f1ab80 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Wed, 6 Feb 2019 07:59:27 +0800 Subject: [PATCH 06/40] new: docs: permission error fix --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 68cd5d8..294dc11 100644 --- a/README.md +++ b/README.md @@ -64,11 +64,11 @@ sudo apt-get install python3-pip python3-opencv redis-server -y # read more here: https://github.com/pypa/pip/issues/3813 PATH="$PATH:~/.local/bin" -#cd to project directory +# cd to project directory pip3 install . ``` -You also need install redis. +if you got permission error, install it under virtual env or use `--user` flag. # Demo 1 From e9dc512beb2cdcba1d293ed265d0a918074a933d Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Wed, 6 Feb 2019 08:30:58 +0800 Subject: [PATCH 07/40] fix: version output result --- tests/test_main.py | 5 ++- transformation_invariant_image_search/main.py | 32 +++++++++++++------ 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/tests/test_main.py b/tests/test_main.py index 2d5892b..f5268db 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -34,7 +34,10 @@ def test_empty_db(client): @pytest.mark.parametrize( 'args,word', - [('--help', 'Usage:'), ('--version', None)] + [ + ('--help', 'Usage:'), + ('--version', 'Transformation Invariant Image Search') + ] ) def test_cli(args, word): runner = CliRunner() diff --git a/transformation_invariant_image_search/main.py b/transformation_invariant_image_search/main.py index 67008fb..00df7d1 100644 --- a/transformation_invariant_image_search/main.py +++ b/transformation_invariant_image_search/main.py @@ -5,14 +5,16 @@ from collections import Counter from os import cpu_count import multiprocessing -import sys import os +import platform +import sys from flask import Flask from flask.cli import FlaskGroup from flask_admin import Admin, AdminIndexView import click import cv2 +import flask import numpy as np import redis @@ -109,22 +111,34 @@ def shell_context(): def get_custom_version(ctx, param, value): - # if not value or ctx.resilient_parsing: - # return - message = '{app_name} {app_version}\nFlask {version}\nPython {python_version}' - click.echo(message.format(**{ + """Output modified --version flag result. + + Modified from: + https://github.com/pallets/flask/blob/master/flask/cli.py + """ + if not value or ctx.resilient_parsing: + return + import werkzeug + message = ( + '%(app_name)s %(app_version)s\n' + 'Python %(python)s\n' + 'Flask %(flask)s\n' + 'Werkzeug %(werkzeug)s' + ) + click.echo(message % { 'app_name': 'Transformation Invariant Image Search', 'app_version': __version__, - 'version': flask_version, - 'python_version': sys.version, - }), color=ctx.color) + 'python': platform.python_version(), + 'flask': flask.__version__, + 'werkzeug': werkzeug.__version__, + }, color=ctx.color) ctx.exit() class CustomFlaskGroup(FlaskGroup): def __init__(self, **kwargs): super().__init__(**kwargs) - self.params[0].help = 'Show the program version' + self.params[0].help = 'Show the program version.' self.params[0].callback = get_custom_version From 564a4b458553c2458193165dff7e47dee7e6e170 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Wed, 6 Feb 2019 09:00:50 +0800 Subject: [PATCH 08/40] new: docs: main func --- transformation_invariant_image_search/keypoints.py | 10 ++++++++++ transformation_invariant_image_search/main.py | 12 ++++++++++++ transformation_invariant_image_search/phash.py | 12 ++++++++++++ 3 files changed, 34 insertions(+) diff --git a/transformation_invariant_image_search/keypoints.py b/transformation_invariant_image_search/keypoints.py index c46716a..6407b7e 100644 --- a/transformation_invariant_image_search/keypoints.py +++ b/transformation_invariant_image_search/keypoints.py @@ -56,6 +56,16 @@ def recolour(img, gauss_width=41): def compute_keypoints(img): + """Compute keypoints. + + >>> filename = 'fullEndToEndDemo/inputImages/cat_original.png' + >>> img = cv2.imread(filename) + >>> res = compute_keypoints(img) + >>> len(res) == 50 + True + >>> sorted(res)[0] + (1.0, 26.0) + """ gauss_width = 21 img = recolour(img, gauss_width) b, _, _ = cv2.split(img) diff --git a/transformation_invariant_image_search/main.py b/transformation_invariant_image_search/main.py index 00df7d1..f9a2403 100644 --- a/transformation_invariant_image_search/main.py +++ b/transformation_invariant_image_search/main.py @@ -28,6 +28,18 @@ def phash_triangles(img, triangles, batch_size=None): + """Get phash from triangles. + + >>> filename = 'fullEndToEndDemo/inputImages/cat_original.png' + >>> img = cv2.imread(filename) + >>> keypoints = compute_keypoints(img) + >>> triangles = triangles_from_keypoints(keypoints) + >>> res = phash_triangles(img, triangles) + >>> len(res) + 34770 + >>> sorted(res)[0] + '0000563b8d730d07' + """ n = len(triangles) if batch_size is None: diff --git a/transformation_invariant_image_search/phash.py b/transformation_invariant_image_search/phash.py index fd63bb6..957a08d 100644 --- a/transformation_invariant_image_search/phash.py +++ b/transformation_invariant_image_search/phash.py @@ -90,6 +90,18 @@ def hash_triangles(img, triangles): def triangles_from_keypoints(keypoints, lower=50, upper=400): + """Get Triangles from keypoints. + + >>> from .keypoints import compute_keypoints + >>> filename = 'fullEndToEndDemo/inputImages/cat_original.png' + >>> img = cv2.imread(filename) + >>> keypoints = compute_keypoints(img) + >>> res = triangles_from_keypoints(keypoints) + >>> len(res) + 11590 + >>> res[0] + (array([162., 203.]), array([261., 76.]), array([131., 63.])) + """ keypoints = np.asarray(keypoints, dtype=float) tree = BallTree(keypoints, leaf_size=10) From 8027c71413674708413ce34601324eef3ab1da4f Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Wed, 6 Feb 2019 09:24:54 +0800 Subject: [PATCH 09/40] new: test: travis chg: test: remove travis test for python 3.7 fix: test: travis pytest ImportMismatchError fix: test: remove pyc chg: test: install script on travis --- .travis.yml | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..980524a --- /dev/null +++ b/.travis.yml @@ -0,0 +1,9 @@ +language: python +python: + - "3.6" +# command to install dependencies +install: + - python setup.py develop +# command to run tests +script: + - pytest --doctest-module From 7f2408dcf1eee3e60e72fb24428290cee9b44df1 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Wed, 6 Feb 2019 09:35:31 +0800 Subject: [PATCH 10/40] fix: docs: readme syntax --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 294dc11..0646ebf 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ Instead of running these commands manually you can run the ./setup.sh script whi Or if you want to run the commands manually... -``` +```console # From the root of the repo go to ./fullEndToEndDemo cd ./fullEndToEndDemo @@ -56,7 +56,7 @@ This setup was tested on a newly deployed vm on Ubuntu 18.04 LTS, YMMV on differ To use python package, do the following: -``` +```console sudo apt-get update sudo apt-get install python3-pip python3-opencv redis-server -y @@ -92,7 +92,7 @@ The demo takes 2 minutes (1 minute 38 seconds*) to run on a quad core VM *Thanks to [meowcoder](https://github.com/meowcoder) for the speed up! -``` +```console user@instance-1:~/transformationInvariantImageSearch/fullEndToEndDemo$ time ./runDemo1.sh Loading image: inputImages/cat1.png ... done Added 46725 image fragments to DB @@ -133,8 +133,8 @@ sys 0m6.592s python example ```console -$ time transformation-invariant-image-search insert fullEndToEndDemo/inputImages/cat* && \ - time transformation-invariant-image-search lookup fullEndToEndDemo/inputImages/cat_original.png +$ time transformation-invariant-image-search insert fullEndToEndDemo/inputImages/cat* && \ +$ time transformation-invariant-image-search lookup fullEndToEndDemo/inputImages/cat_original.png loading fullEndToEndDemo/inputImages/cat1.png 100%|██| 3/3 [00:07<00:00, 2.66s/it] @@ -217,7 +217,7 @@ Here the two images mona.jpg and van_gogh.jpg are inserted into the database and *Thanks to [meowcoder](https://github.com/meowcoder) for the speed up! -``` +```console user@instance-1:~/transformationInvariantImageSearch/fullEndToEndDemo$ time ./runDemo2.sh Loading image: ./inputImages/mona.jpg ... done Added 26991 image fragments to DB @@ -238,7 +238,7 @@ python example ```console $ time transformation-invariant-image-search insert ./fullEndToEndDemo/inputImages/mona.jpg ./fullEndToEndDemo/inputImages/van_gogh.jpg && \ - time transformation-invariant-image-search lookup ./fullEndToEndDemo/inputImages/monaComposite.jpg +$ time transformation-invariant-image-search lookup ./fullEndToEndDemo/inputImages/monaComposite.jpg loading ./fullEndToEndDemo/inputImages/mona.jpg 100%|███| 3/3 [00:03<00:00, 1.24s/it] From 707114a93b4119a90bfa7d27782dbf9aa2789ace Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Wed, 6 Feb 2019 09:37:33 +0800 Subject: [PATCH 11/40] new: dev: add opencv package --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 85bacfd..0153cde 100644 --- a/setup.py +++ b/setup.py @@ -30,6 +30,7 @@ def readme(): 'Flask>=1.0.2', 'hiredis', 'numpy', + 'opencv-python>=4.0.0.21', 'redis', 'scikit-learn', 'scipy', From e868d2ceff82a5cd4ea3f54382767133052430d4 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Wed, 6 Feb 2019 09:51:16 +0800 Subject: [PATCH 12/40] chg: docs: syntax highlight code --- README.md | 59 +++++++++++++++++++++++++++---------------------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index 0646ebf..fac9984 100644 --- a/README.md +++ b/README.md @@ -28,24 +28,22 @@ Instead of running these commands manually you can run the ./setup.sh script whi Or if you want to run the commands manually... ```console -# From the root of the repo go to ./fullEndToEndDemo -cd ./fullEndToEndDemo - -# Grab all the dependencies, this install is pretty huge -sudo apt-get update -sudo apt-get install git cmake g++ redis-server libboost-all-dev libopencv-dev python-opencv python-numpy python-scipy -y - -#Make it -cmake . -make - -# This step is optional. It removes a pointless annoying error opencv spits out -# About: https://stackoverflow.com/questions/12689304/ctypes-error-libdc1394-error-failed-to-initialize-libdc1394 -sudo ln /dev/null /dev/raw1394 - -# Then run either ./runDemo1.sh or ./runDemo2.sh to run the demo - - +$ # From the root of the repo go to ./fullEndToEndDemo +$ cd ./fullEndToEndDemo +$ +$ # Grab all the dependencies, this install is pretty huge +$ sudo apt-get update +$ sudo apt-get install git cmake g++ redis-server libboost-all-dev libopencv-dev python-opencv python-numpy python-scipy -y +$ +$ #Make it +$ cmake . +$ make +$ +$ # This step is optional. It removes a pointless annoying error opencv spits out +$ # About: https://stackoverflow.com/questions/12689304/ctypes-error-libdc1394-error-failed-to-initialize-libdc1394 +$ sudo ln /dev/null /dev/raw1394 +$ +$ # Then run either ./runDemo1.sh or ./runDemo2.sh to run the demo ``` # Python setup @@ -57,15 +55,15 @@ This setup was tested on a newly deployed vm on Ubuntu 18.04 LTS, YMMV on differ To use python package, do the following: ```console -sudo apt-get update -sudo apt-get install python3-pip python3-opencv redis-server -y - -# On some systems this path is missing -# read more here: https://github.com/pypa/pip/issues/3813 -PATH="$PATH:~/.local/bin" - -# cd to project directory -pip3 install . +$ sudo apt-get update +$ sudo apt-get install python3-pip python3-opencv redis-server -y +$ +$ # On some systems this path is missing +$ # read more here: https://github.com/pypa/pip/issues/3813 +$ PATH="$PATH:~/.local/bin" +$ +$ # cd to project directory +$ pip3 install . ``` if you got permission error, install it under virtual env or use `--user` flag. @@ -93,7 +91,7 @@ The demo takes 2 minutes (1 minute 38 seconds*) to run on a quad core VM *Thanks to [meowcoder](https://github.com/meowcoder) for the speed up! ```console -user@instance-1:~/transformationInvariantImageSearch/fullEndToEndDemo$ time ./runDemo1.sh +$ time ./fullEndToEndDemo/runDemo1.sh Loading image: inputImages/cat1.png ... done Added 46725 image fragments to DB Loading image: inputImages/cat2.png ... done @@ -218,7 +216,7 @@ Here the two images mona.jpg and van_gogh.jpg are inserted into the database and *Thanks to [meowcoder](https://github.com/meowcoder) for the speed up! ```console -user@instance-1:~/transformationInvariantImageSearch/fullEndToEndDemo$ time ./runDemo2.sh +$ time ./fullEndToEndDemo/runDemo2.sh Loading image: ./inputImages/mona.jpg ... done Added 26991 image fragments to DB Loading image: ./inputImages/van_gogh.jpg ... done @@ -237,7 +235,8 @@ sys 0m18.224s python example ```console -$ time transformation-invariant-image-search insert ./fullEndToEndDemo/inputImages/mona.jpg ./fullEndToEndDemo/inputImages/van_gogh.jpg && \ +$ time transformation-invariant-image-search insert \ +$ ./fullEndToEndDemo/inputImages/mona.jpg ./fullEndToEndDemo/inputImages/van_gogh.jpg && \ $ time transformation-invariant-image-search lookup ./fullEndToEndDemo/inputImages/monaComposite.jpg loading ./fullEndToEndDemo/inputImages/mona.jpg From 1700721f71a44372706013b15a27ae5dcba81d55 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Wed, 6 Feb 2019 10:38:38 +0800 Subject: [PATCH 13/40] fix: docs: triangles_from_keypoints --- transformation_invariant_image_search/phash.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/transformation_invariant_image_search/phash.py b/transformation_invariant_image_search/phash.py index 957a08d..d62681b 100644 --- a/transformation_invariant_image_search/phash.py +++ b/transformation_invariant_image_search/phash.py @@ -99,8 +99,8 @@ def triangles_from_keypoints(keypoints, lower=50, upper=400): >>> res = triangles_from_keypoints(keypoints) >>> len(res) 11590 - >>> res[0] - (array([162., 203.]), array([261., 76.]), array([131., 63.])) + >>> print(list(map(lambda x: x.tolist(), res[0]))) + [[162.0, 203.0], [261.0, 76.0], [131.0, 63.0]] """ keypoints = np.asarray(keypoints, dtype=float) From 39f1ac0ac5e4b7efb55d60ad5ab9879679c047d9 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Wed, 6 Feb 2019 11:06:32 +0800 Subject: [PATCH 14/40] new: test: checksum model --- tests/test_models.py | 13 +++++++++++++ transformation_invariant_image_search/main.py | 2 +- transformation_invariant_image_search/models.py | 8 ++++---- 3 files changed, 18 insertions(+), 5 deletions(-) create mode 100644 tests/test_models.py diff --git a/tests/test_models.py b/tests/test_models.py new file mode 100644 index 0000000..2125daf --- /dev/null +++ b/tests/test_models.py @@ -0,0 +1,13 @@ +def test_checksum(): + from transformation_invariant_image_search import models, main + app = main.create_app(db_uri='sqlite://') + csm_value = '54abb6e1eb59cccf61ae356aff7e491894c5ca606dfda4240d86743424c65faf' + with app.app_context(): + models.DB.create_all() + m = models.Checksum(value=csm_value, ext='png') + models.DB.session.add(m) + models.DB.session.commit() + assert m.id == 1 + + res = models.DB.session.query(models.Checksum).filter_by(id=1).first() + assert res.value == csm_value diff --git a/transformation_invariant_image_search/main.py b/transformation_invariant_image_search/main.py index f9a2403..c22465b 100644 --- a/transformation_invariant_image_search/main.py +++ b/transformation_invariant_image_search/main.py @@ -99,8 +99,8 @@ def create_app(script_info=None, db_uri=DEFAULT_DB_URI): app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False app.config['SECRET_KEY'] = os.getenv('TIIS_SECRET_KEY') or os.urandom(24) app.config['WTF_CSRF_ENABLED'] = False + DB.init_app(app) # app and db - # DB.init_app(app) # app.app_context().push() # db.create_all() diff --git a/transformation_invariant_image_search/models.py b/transformation_invariant_image_search/models.py index a30767f..9ea429e 100644 --- a/transformation_invariant_image_search/models.py +++ b/transformation_invariant_image_search/models.py @@ -24,8 +24,8 @@ class Checksum(Base): ext = DB.Column(DB.String(), nullable=False) def __repr__(self): - templ = '' - return templ.format(self) + templ = '' + return templ.format(self, self.value[:7]) class Point(Base): @@ -46,8 +46,8 @@ def __repr__(self): class TrianglePhash(Base): - checksum_id = DB.Column() - checksum = DB.Column() + checksum_id = DB.Column(DB.Integer, DB.ForeignKey('checksum.id'), nullable=False) + checksum = DB.relationship('Checksum', backref='triangle_phashes', lazy=True) points = DB.relationship('Point', secondary=triangle_points, lazy='subquery', backref=DB.backref('triangle_phashes', lazy=True)) phashes = DB.relationship('Phash', secondary=triangle_phashes, lazy='subquery', From ea87cccbee62e38a09689f0bcdcca7d98283e574 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Wed, 6 Feb 2019 12:23:21 +0800 Subject: [PATCH 15/40] new: test: checksum list --- tests/test_main.py | 30 +++++++++++---- transformation_invariant_image_search/main.py | 37 ++++++++++++++++--- .../models.py | 4 ++ 3 files changed, 59 insertions(+), 12 deletions(-) diff --git a/tests/test_main.py b/tests/test_main.py index f5268db..95d7043 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,3 +1,4 @@ +import json import os import tempfile @@ -10,15 +11,13 @@ @pytest.fixture def client(): - app = main.create_app() - db_fd, app.config['DATABASE'] = tempfile.mkstemp() + db_fd, config_db = tempfile.mkstemp() + db_uri = 'sqlite:///{}'.format(config_db) + app = main.create_app(db_uri=db_uri) + app.config['DATABASE'] = config_db app.config['TESTING'] = True client = app.test_client() - with app.app_context(): - # flaskr.init_db() - pass - yield client os.close(db_fd) @@ -27,11 +26,28 @@ def client(): def test_empty_db(client): """Start with a blank database.""" - rv = client.get('/') assert b'Home - Transformation Image Search' in rv.data +def test_checksum_get(client): + """test checksum with a blank database.""" + url = '/api/checksum' + rv = client.get(url) + assert rv.get_json() == {} + + +def test_checksum_post(client): + """Start with a blank database.""" + csm_value = '54abb6e1eb59cccf61ae356aff7e491894c5ca606dfda4240d86743424c65faf' + url = '/api/checksum' + exp_dict = dict(value=csm_value, id=1, ext='png', trash=False) + rv = client.post(url, data=dict(value=csm_value, ext='png')) + assert rv.get_json() == exp_dict + rv = client.get(url) + assert rv.get_json() == [exp_dict] + + @pytest.mark.parametrize( 'args,word', [ diff --git a/transformation_invariant_image_search/main.py b/transformation_invariant_image_search/main.py index c22465b..fd29fa6 100644 --- a/transformation_invariant_image_search/main.py +++ b/transformation_invariant_image_search/main.py @@ -9,9 +9,11 @@ import platform import sys -from flask import Flask +from flask import Flask, jsonify, request from flask.cli import FlaskGroup from flask_admin import Admin, AdminIndexView +from flask_sqlalchemy import SQLAlchemy +from sqlalchemy_utils import database_exists, create_database import click import cv2 import flask @@ -20,7 +22,8 @@ from .keypoints import compute_keypoints from .phash import triangles_from_keypoints, hash_triangles -from .models import DB +from .models import DB, Checksum +from . import models __version__ = '0.0.1' @@ -100,9 +103,10 @@ def create_app(script_info=None, db_uri=DEFAULT_DB_URI): app.config['SECRET_KEY'] = os.getenv('TIIS_SECRET_KEY') or os.urandom(24) app.config['WTF_CSRF_ENABLED'] = False DB.init_app(app) - # app and db - # app.app_context().push() - # db.create_all() + if not database_exists(db_uri): + create_database(db_uri) + with app.app_context(): + DB.create_all() @app.shell_context_processor def shell_context(): @@ -119,9 +123,32 @@ def shell_context(): ) ) # index_view=views.HomeView(name='Home', template='transformation_invariant_image_search/index.html', url='/')) # NOQA + app.add_url_rule('/api/checksum', 'checksum_list', checksum_list, methods=['GET', 'POST']) return app +def checksum_list(): + if request.method == 'POST': + csm_value = request.form.get('value', None) + if not csm_value: + return jsonify({}) + m = DB.session.query(Checksum).filter_by(value=csm_value).first() + if m is None: + kwargs = dict(value=csm_value) + ext = request.form.get('ext', None) + if ext is not None: + kwargs['ext'] = ext + trash = request.form.get('trash', None) + if trash is not None: + kwargs['trash'] = trash + m = Checksum(**kwargs) + DB.session.add(m) + DB.session.commit() + return jsonify(m.to_dict()) + ms = DB.session.query(Checksum).paginate(1, 10).items + return jsonify([x.to_dict() for x in ms]) + + def get_custom_version(ctx, param, value): """Output modified --version flag result. diff --git a/transformation_invariant_image_search/models.py b/transformation_invariant_image_search/models.py index 9ea429e..5080408 100644 --- a/transformation_invariant_image_search/models.py +++ b/transformation_invariant_image_search/models.py @@ -27,6 +27,10 @@ def __repr__(self): templ = '' return templ.format(self, self.value[:7]) + def to_dict(self): + keys = ['value', 'trash', 'ext', 'id'] + return {k: getattr(self, k) for k in keys} + class Point(Base): x = DB.Column(DB.Integer(), nullable=False) From 1d9ed8006be4b19d6429f8484fef45fa02750819 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Wed, 6 Feb 2019 12:23:33 +0800 Subject: [PATCH 16/40] new: dev: add sqlalchemy_utils --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 0153cde..1079eed 100644 --- a/setup.py +++ b/setup.py @@ -34,6 +34,7 @@ def readme(): 'redis', 'scikit-learn', 'scipy', + 'SQLAlchemy-Utils>=0.33.11', 'tqdm>=4.29.1', ], extras_require={ From 263294d7d9a8732407ddcef66a8394d27a403827 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Wed, 6 Feb 2019 13:25:38 +0800 Subject: [PATCH 17/40] new: dev: image api --- tests/test_main.py | 24 +++++++++- transformation_invariant_image_search/main.py | 48 ++++++++++++++++++- 2 files changed, 69 insertions(+), 3 deletions(-) diff --git a/tests/test_main.py b/tests/test_main.py index 95d7043..f1f1fe0 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,8 +1,10 @@ import json import os +import shutil import tempfile from click.testing import CliRunner +from flask import current_app import click import pytest @@ -12,8 +14,9 @@ @pytest.fixture def client(): db_fd, config_db = tempfile.mkstemp() + image_fd = tempfile.mkdtemp() db_uri = 'sqlite:///{}'.format(config_db) - app = main.create_app(db_uri=db_uri) + app = main.create_app(db_uri=db_uri, image_dir=image_fd) app.config['DATABASE'] = config_db app.config['TESTING'] = True client = app.test_client() @@ -22,6 +25,7 @@ def client(): os.close(db_fd) os.unlink(app.config['DATABASE']) + shutil.rmtree(image_fd) def test_empty_db(client): @@ -48,6 +52,24 @@ def test_checksum_post(client): assert rv.get_json() == [exp_dict] +def test_image_post(client): + url = '/api/image' + filename = 'fullEndToEndDemo/inputImages/cat_original.png' + csm_value = '54abb6e1eb59cccf61ae356aff7e491894c5ca606dfda4240d86743424c65faf' + ext = 'png' + exp_dict = dict(id=1, value=csm_value, ext=ext, trash=False) + rv = client.post(url) + assert rv.get_json()['error'] + file_data = {'file': open(filename, 'rb')} + rv = client.post(url, data=file_data) + assert rv.get_json() == exp_dict + image_dir = client.application.config.get('IMAGE_DIR') + exp_dst_file = os.path.join(image_dir, csm_value[:2], '{}.{}'.format(csm_value, ext)) + assert os.path.isfile(exp_dst_file) + rv = client.get(url) + assert rv.get_json() == [exp_dict] + + @pytest.mark.parametrize( 'args,word', [ diff --git a/transformation_invariant_image_search/main.py b/transformation_invariant_image_search/main.py index fd29fa6..4e12cbf 100644 --- a/transformation_invariant_image_search/main.py +++ b/transformation_invariant_image_search/main.py @@ -4,15 +4,20 @@ """ from collections import Counter from os import cpu_count +import hashlib import multiprocessing import os import platform +import shutil import sys +import tempfile +import pathlib -from flask import Flask, jsonify, request +from flask import current_app, Flask, jsonify, request from flask.cli import FlaskGroup from flask_admin import Admin, AdminIndexView from flask_sqlalchemy import SQLAlchemy +from PIL import Image from sqlalchemy_utils import database_exists, create_database import click import cv2 @@ -28,6 +33,7 @@ __version__ = '0.0.1' DEFAULT_DB_URI = None +DEFAULT_IMAGE_DIR = None def phash_triangles(img, triangles, batch_size=None): @@ -95,13 +101,14 @@ def lookup(chunks, filename): print(f'{num:<10d} {key.decode("utf-8")}') -def create_app(script_info=None, db_uri=DEFAULT_DB_URI): +def create_app(script_info=None, db_uri=DEFAULT_DB_URI, image_dir=DEFAULT_IMAGE_DIR): """create app.""" app = Flask(__name__) app.config['SQLALCHEMY_DATABASE_URI'] = db_uri # NOQA app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False app.config['SECRET_KEY'] = os.getenv('TIIS_SECRET_KEY') or os.urandom(24) app.config['WTF_CSRF_ENABLED'] = False + app.config['IMAGE_DIR'] = image_dir DB.init_app(app) if not database_exists(db_uri): create_database(db_uri) @@ -124,6 +131,7 @@ def shell_context(): ) # index_view=views.HomeView(name='Home', template='transformation_invariant_image_search/index.html', url='/')) # NOQA app.add_url_rule('/api/checksum', 'checksum_list', checksum_list, methods=['GET', 'POST']) + app.add_url_rule('/api/image', 'image_list', image_list, methods=['GET', 'POST']) return app @@ -149,6 +157,42 @@ def checksum_list(): return jsonify([x.to_dict() for x in ms]) +def image_list(): + if request.method == 'POST': + # check if the post request has the file part + if 'file' not in request.files: + return jsonify({'error': 'No file part'}) + file_ = request.files['file'] + # if user does not select file, browser also + # submit an empty part without filename + if file_.filename == '': + return jsonify({'error': 'No selected file'}) + with tempfile.NamedTemporaryFile(delete=False) as f: + file_.save(f.name) + pil_img = Image.open(f.name) + sha256 = hashlib.sha256() + with open(f.name, 'rb') as f: + for block in iter(lambda: f.read(128*1024), b''): + sha256.update(block) + sha256_csum = sha256.hexdigest() + image_dir = current_app.config.get('IMAGE_DIR', None) + if image_dir is None: + return jsonify({'error': 'Image dir is not specified'}) + ext = pil_img.format.lower() + dst_file = os.path.join( + image_dir, sha256_csum[:2], '{}.{}'.format(sha256_csum, ext)) + m = models.get_or_create(DB.session, Checksum, value=sha256_csum)[0] + m.ext = ext + m.trash = False + pathlib.Path(os.path.dirname(dst_file)).mkdir(parents=True, exist_ok=True) + shutil.move(f.name, dst_file) + DB.session.add(m) + DB.session.commit() + return jsonify(m.to_dict()) + ms = DB.session.query(Checksum).filter_by(trash=False).paginate(1, 10).items + return jsonify([x.to_dict() for x in ms]) + + def get_custom_version(ctx, param, value): """Output modified --version flag result. From 7687688455399b6a52bf490d5af64d0f870951d7 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Wed, 6 Feb 2019 13:32:47 +0800 Subject: [PATCH 18/40] new: dev: add pillow --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 1079eed..135d092 100644 --- a/setup.py +++ b/setup.py @@ -31,6 +31,7 @@ def readme(): 'hiredis', 'numpy', 'opencv-python>=4.0.0.21', + 'Pillow>=5.4.1', 'redis', 'scikit-learn', 'scipy', From 787f357e7fe3cbc06ad7bbc89b381314d394b98f Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Wed, 6 Feb 2019 13:35:36 +0800 Subject: [PATCH 19/40] new: dev: add appdirs --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 135d092..2330d6d 100644 --- a/setup.py +++ b/setup.py @@ -25,6 +25,7 @@ def readme(): zip_safe=False, python_requires='>=3.6', install_requires=[ + 'appdirs>=1.4.3', 'Flask-Admin==1.5.3', 'Flask-SQLAlchemy>=2.3.2', 'Flask>=1.0.2', From 9cffc844224314b9ac8f273f66f4f5c7c6298a1b Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Wed, 6 Feb 2019 13:41:44 +0800 Subject: [PATCH 20/40] fix: dev: checksum get api exp res --- tests/test_main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_main.py b/tests/test_main.py index f1f1fe0..b415339 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -38,7 +38,7 @@ def test_checksum_get(client): """test checksum with a blank database.""" url = '/api/checksum' rv = client.get(url) - assert rv.get_json() == {} + assert rv.get_json() == [] def test_checksum_post(client): From da538adc89842f61ca5c87e4c06d961a425a7f00 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Wed, 6 Feb 2019 13:42:04 +0800 Subject: [PATCH 21/40] new: dev: data dir --- transformation_invariant_image_search/main.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/transformation_invariant_image_search/main.py b/transformation_invariant_image_search/main.py index 4e12cbf..92391b5 100644 --- a/transformation_invariant_image_search/main.py +++ b/transformation_invariant_image_search/main.py @@ -13,6 +13,7 @@ import tempfile import pathlib +from appdirs import user_data_dir from flask import current_app, Flask, jsonify, request from flask.cli import FlaskGroup from flask_admin import Admin, AdminIndexView @@ -32,8 +33,10 @@ __version__ = '0.0.1' -DEFAULT_DB_URI = None -DEFAULT_IMAGE_DIR = None +DATA_DIR = user_data_dir('transformation_invariant_image_search', 'Tom Murphy') +pathlib.Path(DATA_DIR).mkdir(parents=True, exist_ok=True) +DEFAULT_DB_URI = 'sqlite:///{}'.format(os.path.join(DATA_DIR, 'tiis.db')) +DEFAULT_IMAGE_DIR = os.path.join(DATA_DIR, 'image') def phash_triangles(img, triangles, batch_size=None): From 0b9dacdb6f3e44ee087920bb27249132f10e015e Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Wed, 6 Feb 2019 13:52:14 +0800 Subject: [PATCH 22/40] new: dev: home page --- tests/test_main.py | 2 +- transformation_invariant_image_search/main.py | 6 +++--- .../templates/tiis/index.html | 5 +++++ 3 files changed, 9 insertions(+), 4 deletions(-) create mode 100644 transformation_invariant_image_search/templates/tiis/index.html diff --git a/tests/test_main.py b/tests/test_main.py index b415339..e28ae90 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -31,7 +31,7 @@ def client(): def test_empty_db(client): """Start with a blank database.""" rv = client.get('/') - assert b'Home - Transformation Image Search' in rv.data + assert b'Home - Transformation Invariant Image Search' in rv.data def test_checksum_get(client): diff --git a/transformation_invariant_image_search/main.py b/transformation_invariant_image_search/main.py index 92391b5..13c927a 100644 --- a/transformation_invariant_image_search/main.py +++ b/transformation_invariant_image_search/main.py @@ -125,10 +125,10 @@ def shell_context(): # Migrate(app, DB) # flask-admin app_admin = Admin( - app, name='Transformation Image Search', template_mode='bootstrap3', + app, name='Transformation Invariant Image Search', template_mode='bootstrap3', index_view=AdminIndexView( - # name='Home', - # template='admin/myhome.html', + name='Home', + template='tiis/index.html', url='/' ) ) diff --git a/transformation_invariant_image_search/templates/tiis/index.html b/transformation_invariant_image_search/templates/tiis/index.html new file mode 100644 index 0000000..ac160d1 --- /dev/null +++ b/transformation_invariant_image_search/templates/tiis/index.html @@ -0,0 +1,5 @@ +{% extends 'admin/master.html' %} + +{% block body %} +hello world +{% endblock %} From 288aabf48caec5ac5eed006ea7d7395e278d1eb1 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Wed, 6 Feb 2019 15:32:32 +0800 Subject: [PATCH 23/40] new: dev: upload file api --- tests/test_main.py | 4 +- transformation_invariant_image_search/main.py | 22 ++++++++++- .../templates/tiis/index.html | 38 ++++++++++++++++++- 3 files changed, 60 insertions(+), 4 deletions(-) diff --git a/tests/test_main.py b/tests/test_main.py index e28ae90..2898b1b 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -62,7 +62,9 @@ def test_image_post(client): assert rv.get_json()['error'] file_data = {'file': open(filename, 'rb')} rv = client.post(url, data=file_data) - assert rv.get_json() == exp_dict + post_exp_dict = exp_dict.copy() + post_exp_dict['url'] = ['http://localhost/i/{}.{}'.format(csm_value, ext)] + assert rv.get_json() == post_exp_dict image_dir = client.application.config.get('IMAGE_DIR') exp_dst_file = os.path.join(image_dir, csm_value[:2], '{}.{}'.format(csm_value, ext)) assert os.path.isfile(exp_dst_file) diff --git a/transformation_invariant_image_search/main.py b/transformation_invariant_image_search/main.py index 13c927a..79d29fa 100644 --- a/transformation_invariant_image_search/main.py +++ b/transformation_invariant_image_search/main.py @@ -14,7 +14,6 @@ import pathlib from appdirs import user_data_dir -from flask import current_app, Flask, jsonify, request from flask.cli import FlaskGroup from flask_admin import Admin, AdminIndexView from flask_sqlalchemy import SQLAlchemy @@ -25,6 +24,14 @@ import flask import numpy as np import redis +from flask import ( + current_app, + Flask, + jsonify, + request, + send_from_directory, + url_for, +) from .keypoints import compute_keypoints from .phash import triangles_from_keypoints, hash_triangles @@ -135,9 +142,16 @@ def shell_context(): # index_view=views.HomeView(name='Home', template='transformation_invariant_image_search/index.html', url='/')) # NOQA app.add_url_rule('/api/checksum', 'checksum_list', checksum_list, methods=['GET', 'POST']) app.add_url_rule('/api/image', 'image_list', image_list, methods=['GET', 'POST']) + app.add_url_rule('/i/', 'image_url', image_url) return app +def image_url(filename): + img_dir = current_app.config.get('IMAGE_DIR') + return send_from_directory( + img_dir, os.path.join(filename[:2], filename)) + + def checksum_list(): if request.method == 'POST': csm_value = request.form.get('value', None) @@ -191,7 +205,11 @@ def image_list(): shutil.move(f.name, dst_file) DB.session.add(m) DB.session.commit() - return jsonify(m.to_dict()) + dict_res = m.to_dict() + dict_res['url'] = url_for( + '.image_url', _external=True, + filename='{}.{}'.format(m.value, m.ext)), + return jsonify(dict_res) ms = DB.session.query(Checksum).filter_by(trash=False).paginate(1, 10).items return jsonify([x.to_dict() for x in ms]) diff --git a/transformation_invariant_image_search/templates/tiis/index.html b/transformation_invariant_image_search/templates/tiis/index.html index ac160d1..9d383fc 100644 --- a/transformation_invariant_image_search/templates/tiis/index.html +++ b/transformation_invariant_image_search/templates/tiis/index.html @@ -1,5 +1,41 @@ {% extends 'admin/master.html' %} {% block body %} -hello world +
+
+
+ + +
+ +
+
+
+ {% endblock %} From cb301c8cf7182f1e38044d1e95832ecf38bd4f75 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Wed, 6 Feb 2019 16:50:06 +0800 Subject: [PATCH 24/40] new: dev: checksum duplicate !wip --- tests/test_main.py | 11 +++ tests/test_models.py | 13 +++ transformation_invariant_image_search/main.py | 19 ++++- .../models.py | 83 +++++++++++++++++++ 4 files changed, 122 insertions(+), 4 deletions(-) diff --git a/tests/test_main.py b/tests/test_main.py index 2898b1b..d6f3641 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -72,6 +72,17 @@ def test_image_post(client): assert rv.get_json() == [exp_dict] +def test_checksum_duplicate(client): + filename1 = 'fullEndToEndDemo/inputImages/cat_original.png' + filename2 = 'fullEndToEndDemo/inputImages/cat1.png' + upload_url = '/api/image' + url = '/api/checksum/{}/duplicate' + client.post(upload_url, data={'file': open(filename1, 'rb')}) + client.post(upload_url, data={'file': open(filename2, 'rb')}) + rv = client.get(url.format(2)) + assert rv.get_json() == [] + + @pytest.mark.parametrize( 'args,word', [ diff --git a/tests/test_models.py b/tests/test_models.py index 2125daf..8a4e524 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -11,3 +11,16 @@ def test_checksum(): res = models.DB.session.query(models.Checksum).filter_by(id=1).first() assert res.value == csm_value + + +def test_get_duplicate(): + from transformation_invariant_image_search import models, main + filename1 = 'fullEndToEndDemo/inputImages/cat_original.png' + filename2 = 'fullEndToEndDemo/inputImages/cat1.png' + app = main.create_app(db_uri='sqlite://') + with app.app_context(): + models.DB.create_all() + res = models.get_duplicate(models.DB.session, filename1) + assert res == [] + res = models.get_duplicate(models.DB.session, filename2) + # assert res != [] # TODO diff --git a/transformation_invariant_image_search/main.py b/transformation_invariant_image_search/main.py index 79d29fa..6532e34 100644 --- a/transformation_invariant_image_search/main.py +++ b/transformation_invariant_image_search/main.py @@ -35,15 +35,17 @@ from .keypoints import compute_keypoints from .phash import triangles_from_keypoints, hash_triangles -from .models import DB, Checksum +from .models import ( + DB, + Checksum, + DATA_DIR, + DEFAULT_IMAGE_DIR +) from . import models __version__ = '0.0.1' -DATA_DIR = user_data_dir('transformation_invariant_image_search', 'Tom Murphy') -pathlib.Path(DATA_DIR).mkdir(parents=True, exist_ok=True) DEFAULT_DB_URI = 'sqlite:///{}'.format(os.path.join(DATA_DIR, 'tiis.db')) -DEFAULT_IMAGE_DIR = os.path.join(DATA_DIR, 'image') def phash_triangles(img, triangles, batch_size=None): @@ -141,6 +143,7 @@ def shell_context(): ) # index_view=views.HomeView(name='Home', template='transformation_invariant_image_search/index.html', url='/')) # NOQA app.add_url_rule('/api/checksum', 'checksum_list', checksum_list, methods=['GET', 'POST']) + app.add_url_rule('/api/checksum//duplicate', 'checksum_duplicate', checksum_duplicate) app.add_url_rule('/api/image', 'image_list', image_list, methods=['GET', 'POST']) app.add_url_rule('/i/', 'image_url', image_url) return app @@ -152,6 +155,14 @@ def image_url(filename): img_dir, os.path.join(filename[:2], filename)) +def checksum_duplicate(cid): + m = DB.session.query(Checksum).filter_by(id=cid).first_or_404() + if not m.triangle_phashes: + # TODO + return jsonify([]) + return jsonify([]) + + def checksum_list(): if request.method == 'POST': csm_value = request.form.get('value', None) diff --git a/transformation_invariant_image_search/models.py b/transformation_invariant_image_search/models.py index 5080408..240400f 100644 --- a/transformation_invariant_image_search/models.py +++ b/transformation_invariant_image_search/models.py @@ -1,7 +1,17 @@ +import hashlib +import os +import pathlib +import shutil + +from appdirs import user_data_dir from flask import Flask from flask_sqlalchemy import SQLAlchemy +from PIL import Image DB = SQLAlchemy() +DATA_DIR = user_data_dir('transformation_invariant_image_search', 'Tom Murphy') +pathlib.Path(DATA_DIR).mkdir(parents=True, exist_ok=True) +DEFAULT_IMAGE_DIR = os.path.join(DATA_DIR, 'image') triangle_points = DB.Table( 'triangle_points', @@ -75,3 +85,76 @@ def get_or_create(session, model, **kwargs): session.add(instance) created = True return instance, created + + +def get_image_path(checksum_value, ext, img_dir=DEFAULT_IMAGE_DIR): + """Get image path. + >>> import tempfile + >>> image_fd = tempfile.mkdtemp() + >>> get_image_path( + ... '54abb6e1eb59cccf61ae356aff7e491894c5ca606dfda4240d86743424c65faf', + ... 'png', image_fd) + '.../54/54abb6e1eb59cccf61ae356aff7e491894c5ca606dfda4240d86743424c65faf.png' + """ + return os.path.join(img_dir, checksum_value[:2], '{}.{}'.format(checksum_value, ext)) + + +def get_or_create_checksum_model(session, filename, img_dir=DEFAULT_IMAGE_DIR): + """Get or create checksum model. + >>> import tempfile + >>> from . import main + >>> filename = 'fullEndToEndDemo/inputImages/cat_original.png' + >>> image_fd = tempfile.mkdtemp() + >>> app = main.create_app(db_uri='sqlite://') + >>> app.app_context().push() + >>> DB.create_all() + >>> get_or_create_checksum_model(DB.session, filename, image_fd) + (, ...) + >>> res = get_or_create_checksum_model(DB.session, filename, image_fd) + >>> res + (, False) + >>> m = res[0] + >>> os.path.isfile(get_image_path(m.value, m.ext, image_fd)) + True + """ + pil_img = Image.open(filename) + sha256 = hashlib.sha256() + with open(filename, 'rb') as f: + for block in iter(lambda: f.read(128*1024), b''): + sha256.update(block) + sha256_csum = sha256.hexdigest() + m, created = get_or_create(session, Checksum, value=sha256_csum) + m.ext = pil_img.format.lower() + m.trash = False + dst_file = get_image_path(m.value, m.ext, img_dir) + pathlib.Path(os.path.dirname(dst_file)).mkdir(parents=True, exist_ok=True) + shutil.copy(filename, dst_file) + return m, created + + +def get_duplicate(session, filename=None, csm_m=None, img_dir=DEFAULT_IMAGE_DIR): + """Get duplicate data. + >>> import tempfile + >>> from . import main + >>> filename1 = 'fullEndToEndDemo/inputImages/cat_original.png' + >>> filename2 = 'fullEndToEndDemo/inputImages/cat1.png' + >>> image_fd = tempfile.mkdtemp() + >>> app = main.create_app(db_uri='sqlite://') + >>> app.app_context().push() + >>> DB.create_all() + >>> get_duplicate(DB.session, filename1) + [] + >>> m = DB.session.query(Checksum).filter_by(id=1).first() + >>> len(m.triangle_phashes) + 0 + """ + if filename is None and csm_m is not None: + # TODO + raise NotImplementedError + m = get_or_create_checksum_model(session, filename, img_dir=img_dir)[0] + if m.triangle_phashes == 0: + img = cv2.imread(filename) + keypoints = compute_keypoints(img) + triangles = triangles_from_keypoints(keypoints, lower=50, upper=400) + hashes = phash_triangles(img, triangles) + return [] From fd2fd12f6baa8084fa5d4db21bf8690eaaf3f5db Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Wed, 6 Feb 2019 17:21:04 +0800 Subject: [PATCH 25/40] fix: test: doctest --- transformation_invariant_image_search/models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/transformation_invariant_image_search/models.py b/transformation_invariant_image_search/models.py index 240400f..e41e88f 100644 --- a/transformation_invariant_image_search/models.py +++ b/transformation_invariant_image_search/models.py @@ -108,8 +108,9 @@ def get_or_create_checksum_model(session, filename, img_dir=DEFAULT_IMAGE_DIR): >>> app = main.create_app(db_uri='sqlite://') >>> app.app_context().push() >>> DB.create_all() + >>> _ = Checksum.query.delete() >>> get_or_create_checksum_model(DB.session, filename, image_fd) - (, ...) + (, True) >>> res = get_or_create_checksum_model(DB.session, filename, image_fd) >>> res (, False) From 06987de4a034040070fd6544954f45f7a1bb829a Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Wed, 6 Feb 2019 20:00:39 +0800 Subject: [PATCH 26/40] new: dev: checksum duplicate !wip - doctest on phash module - set parameter for faster test --- .../models.py | 80 +++++++++++++++++-- .../phash.py | 28 ++++++- 2 files changed, 98 insertions(+), 10 deletions(-) diff --git a/transformation_invariant_image_search/models.py b/transformation_invariant_image_search/models.py index e41e88f..ad8b10b 100644 --- a/transformation_invariant_image_search/models.py +++ b/transformation_invariant_image_search/models.py @@ -7,6 +7,12 @@ from flask import Flask from flask_sqlalchemy import SQLAlchemy from PIL import Image +import cv2 +import tqdm + +from .keypoints import compute_keypoints +from .phash import triangles_from_keypoints, hash_triangles, TRIANGLE_LOWER, TRIANGLE_UPPER + DB = SQLAlchemy() DATA_DIR = user_data_dir('transformation_invariant_image_search', 'Tom Murphy') @@ -21,6 +27,10 @@ 'triangle_phashes', DB.Column('triangle_phash_id', DB.Integer, DB.ForeignKey('triangle_phash.id'), primary_key=True), DB.Column('phash_id', DB.Integer, DB.ForeignKey('phash.id'), primary_key=True)) +checksum_phashes = DB.Table( + 'checksum_phashes', + DB.Column('checksum_id', DB.Integer, DB.ForeignKey('checksum.id'), primary_key=True), + DB.Column('phash_id', DB.Integer, DB.ForeignKey('phash.id'), primary_key=True)) class Base(DB.Model): @@ -32,6 +42,9 @@ class Checksum(Base): value = DB.Column(DB.String(), unique=True, nullable=False) trash = DB.Column(DB.Boolean(), default=False) ext = DB.Column(DB.String(), nullable=False) + phashes = DB.relationship('Phash', secondary=checksum_phashes, lazy='subquery', + backref=DB.backref('checksums', lazy=True)) + def __repr__(self): templ = '' @@ -133,7 +146,9 @@ def get_or_create_checksum_model(session, filename, img_dir=DEFAULT_IMAGE_DIR): return m, created -def get_duplicate(session, filename=None, csm_m=None, img_dir=DEFAULT_IMAGE_DIR): +def get_duplicate( + session, filename=None, csm_m=None, img_dir=DEFAULT_IMAGE_DIR, + triangle_lower=TRIANGLE_LOWER, triangle_upper=TRIANGLE_UPPER): """Get duplicate data. >>> import tempfile >>> from . import main @@ -143,19 +158,70 @@ def get_duplicate(session, filename=None, csm_m=None, img_dir=DEFAULT_IMAGE_DIR) >>> app = main.create_app(db_uri='sqlite://') >>> app.app_context().push() >>> DB.create_all() - >>> get_duplicate(DB.session, filename1) + >>> get_duplicate(DB.session, filename1, triangle_lower=100) [] >>> m = DB.session.query(Checksum).filter_by(id=1).first() - >>> len(m.triangle_phashes) - 0 + >>> len(m.phashes) + 15211 """ if filename is None and csm_m is not None: # TODO raise NotImplementedError m = get_or_create_checksum_model(session, filename, img_dir=img_dir)[0] - if m.triangle_phashes == 0: + if not m.triangle_phashes: img = cv2.imread(filename) keypoints = compute_keypoints(img) - triangles = triangles_from_keypoints(keypoints, lower=50, upper=400) - hashes = phash_triangles(img, triangles) + triangles = triangles_from_keypoints(keypoints, lower=triangle_lower, upper=triangle_upper) + res = [] + hash_list = [] + for triangle in tqdm.tqdm(triangles): + hashes = hash_triangles(img, [triangle]) + res.append((triangle, hashes)) + hash_list.extend(hashes) + keypoint_m_dict = {} + for item in tqdm.tqdm(set(keypoints)): + keypoint_m_dict.setdefault(item[0], {})[item[1]] = \ + get_or_create(session, Point, x=item[0], y=item[1])[0] + save_set = False + bulk_save = True + if save_set: + raise NotImplementedError + if bulk_save: + session.bulk_save_objects([ + Phash(value=item) for item in tqdm.tqdm(set(hash_list)) + ]) + m.phashes = session.query(Phash).all() + elif not bulk_save and save_set: + hashes_m_dict = {x: get_or_create(session, Phash, value=x)[0] for x in tqdm.tqdm(set(hash_list))} + else: + hash_list = [get_or_create(session, Phash, value=x)[0] for x in tqdm.tqdm(set(hash_list))] + m.phashes = hash_list + if save_set: + if bulk_save: + session.bulk_save_objects([ + Phash(value=item) for item in tqdm.tqdm(set(hash_list)) + ]) + else: + hashes_m_dict = {x: get_or_create(session, Phash, value=x)[0] for x in tqdm.tqdm(set(hash_list))} + if bulk_save: + session.bulk_save_objects([ + TrianglePhash( + checksum_id=m.id, + checksum=m, + points=[keypoint_m_dict[po[0]][po[1]] for po in item[0]], + phashes=[hashes_m_dict[ph] for ph in item[1]] + ) for item in tqdm.tqdm(res) + ]) + else: + tp_ms = [ + TrianglePhash( + checksum_id=m.id, + checksum=m, + points=[keypoint_m_dict[po[0]][po[1]] for po in item[0]], + phashes=[hashes_m_dict[ph] for ph in item[1]] + ) for item in tqdm.tqdm(res) + ] + list(map(session.add, tp_ms)) + session.add(m) + session.commit() return [] diff --git a/transformation_invariant_image_search/phash.py b/transformation_invariant_image_search/phash.py index d62681b..77a66ba 100644 --- a/transformation_invariant_image_search/phash.py +++ b/transformation_invariant_image_search/phash.py @@ -6,6 +6,8 @@ HEX_STRINGS = np.array([f'{x:02x}' for x in range(256)]) BIN_POWERS = 2 ** np.arange(8) +TRIANGLE_LOWER = 50 +TRIANGLE_UPPER = 400 def phash(image, hash_size=8, highfreq_factor=4): @@ -26,6 +28,19 @@ def hash_to_hex(a): def hash_triangles(img, triangles): + """Get hash triangles. + >>> from .keypoints import compute_keypoints + >>> filename = 'fullEndToEndDemo/inputImages/cat_original.png' + >>> img = cv2.imread(filename) + >>> keypoints = compute_keypoints(img) + >>> triangles = triangles_from_keypoints(keypoints) + >>> res = hash_triangles(img, triangles) + >>> len(res), sorted(res)[0] + (34770, '0000563b8d730d07') + >>> res = hash_triangles(img, [triangles[0]]) + >>> len(res), sorted(res) + (3, ['709a3765dd04b0f3', 'b8dd5c4e7a352cea', 'de433036010bb391']) + """ n = len(triangles) triangles = np.asarray(triangles) @@ -51,7 +66,7 @@ def hash_triangles(img, triangles): # rotate triangles 3 times, one for each edge of the triangle rotations = (0, 1, 2), (1, 2, 0), (2, 0, 1) - for i, rotation in enumerate(tqdm.tqdm(rotations)): + for i, rotation in enumerate(rotations): p = triangles[:, rotation, :] p0 = p[:, 0] @@ -71,7 +86,8 @@ def hash_triangles(img, triangles): transform = target_points @ input_points_inverse @ transpose_m transform = transform[:, :2, :] - for k in tqdm.tqdm(range(n)): + range_list = tqdm.tqdm(range_list) if len(range_list) > 1 else range(n) + for k in range_list: image = cv2.warpAffine(img, transform[k], size) # calculate dct for perceptual hash @@ -89,7 +105,7 @@ def hash_triangles(img, triangles): return hash_to_hex(hashes) -def triangles_from_keypoints(keypoints, lower=50, upper=400): +def triangles_from_keypoints(keypoints, lower=TRIANGLE_LOWER, upper=TRIANGLE_UPPER): """Get Triangles from keypoints. >>> from .keypoints import compute_keypoints @@ -101,6 +117,12 @@ def triangles_from_keypoints(keypoints, lower=50, upper=400): 11590 >>> print(list(map(lambda x: x.tolist(), res[0]))) [[162.0, 203.0], [261.0, 76.0], [131.0, 63.0]] + >>> res2 = triangles_from_keypoints(keypoints, lower=10) + >>> len(res2) + 14238 + >>> res3 = triangles_from_keypoints(keypoints, upper=100) + >>> len(res3) + 315 """ keypoints = np.asarray(keypoints, dtype=float) From 8a44f0b8c346145f9378e65bc4848733600f2161 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Wed, 6 Feb 2019 20:58:19 +0800 Subject: [PATCH 27/40] fix: dev: range list --- transformation_invariant_image_search/phash.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transformation_invariant_image_search/phash.py b/transformation_invariant_image_search/phash.py index 77a66ba..811854e 100644 --- a/transformation_invariant_image_search/phash.py +++ b/transformation_invariant_image_search/phash.py @@ -86,7 +86,7 @@ def hash_triangles(img, triangles): transform = target_points @ input_points_inverse @ transpose_m transform = transform[:, :2, :] - range_list = tqdm.tqdm(range_list) if len(range_list) > 1 else range(n) + range_list = tqdm.tqdm(range(n)) if len(range(n)) > 1 else range(n) for k in range_list: image = cv2.warpAffine(img, transform[k], size) From 7717f4ef8be0522d2be0155d9fb37d6ce789c373 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Wed, 6 Feb 2019 21:03:04 +0800 Subject: [PATCH 28/40] chg: dev: remove unused test --- tests/test_models.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/tests/test_models.py b/tests/test_models.py index 8a4e524..2125daf 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -11,16 +11,3 @@ def test_checksum(): res = models.DB.session.query(models.Checksum).filter_by(id=1).first() assert res.value == csm_value - - -def test_get_duplicate(): - from transformation_invariant_image_search import models, main - filename1 = 'fullEndToEndDemo/inputImages/cat_original.png' - filename2 = 'fullEndToEndDemo/inputImages/cat1.png' - app = main.create_app(db_uri='sqlite://') - with app.app_context(): - models.DB.create_all() - res = models.get_duplicate(models.DB.session, filename1) - assert res == [] - res = models.get_duplicate(models.DB.session, filename2) - # assert res != [] # TODO From 25e9781055162936033989fc5c0bb0986e6e9f56 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Thu, 7 Feb 2019 11:18:53 +0800 Subject: [PATCH 29/40] new: dev: checksum duplicate --- .../models.py | 91 ++++++++----------- 1 file changed, 38 insertions(+), 53 deletions(-) diff --git a/transformation_invariant_image_search/models.py b/transformation_invariant_image_search/models.py index ad8b10b..a556ae1 100644 --- a/transformation_invariant_image_search/models.py +++ b/transformation_invariant_image_search/models.py @@ -1,3 +1,4 @@ +from itertools import zip_longest import hashlib import os import pathlib @@ -146,8 +147,19 @@ def get_or_create_checksum_model(session, filename, img_dir=DEFAULT_IMAGE_DIR): return m, created +def grouper(iterable, n, fillvalue=None): + """Collect data into fixed-length chunks or blocks. + taken from: + https://docs.python.org/3/library/itertools.html#itertools.zip_longest + >>> list(grouper('ABCDEFG', 3, 'x')) + [('A', 'B', 'C'), ('D', 'E', 'F'), ('G', 'x', 'x')] + """ + args = [iter(iterable)] * n + return zip_longest(*args, fillvalue=fillvalue) + + def get_duplicate( - session, filename=None, csm_m=None, img_dir=DEFAULT_IMAGE_DIR, + session, filename, img_dir=DEFAULT_IMAGE_DIR, triangle_lower=TRIANGLE_LOWER, triangle_upper=TRIANGLE_UPPER): """Get duplicate data. >>> import tempfile @@ -158,70 +170,43 @@ def get_duplicate( >>> app = main.create_app(db_uri='sqlite://') >>> app.app_context().push() >>> DB.create_all() - >>> get_duplicate(DB.session, filename1, triangle_lower=100) + >>> triangle_lower = 100 + >>> get_duplicate(DB.session, filename1, triangle_lower=triangle_lower) [] >>> m = DB.session.query(Checksum).filter_by(id=1).first() >>> len(m.phashes) 15211 + >>> get_duplicate(DB.session, filename2, triangle_lower=triangle_lower) + [] """ - if filename is None and csm_m is not None: - # TODO - raise NotImplementedError - m = get_or_create_checksum_model(session, filename, img_dir=img_dir)[0] + res = [] + m, created = get_or_create_checksum_model(session, filename, img_dir=img_dir) + if created: + session.add(m) + session.commit() if not m.triangle_phashes: img = cv2.imread(filename) keypoints = compute_keypoints(img) triangles = triangles_from_keypoints(keypoints, lower=triangle_lower, upper=triangle_upper) - res = [] hash_list = [] for triangle in tqdm.tqdm(triangles): hashes = hash_triangles(img, [triangle]) - res.append((triangle, hashes)) hash_list.extend(hashes) - keypoint_m_dict = {} - for item in tqdm.tqdm(set(keypoints)): - keypoint_m_dict.setdefault(item[0], {})[item[1]] = \ - get_or_create(session, Point, x=item[0], y=item[1])[0] - save_set = False - bulk_save = True - if save_set: - raise NotImplementedError - if bulk_save: - session.bulk_save_objects([ - Phash(value=item) for item in tqdm.tqdm(set(hash_list)) - ]) - m.phashes = session.query(Phash).all() - elif not bulk_save and save_set: - hashes_m_dict = {x: get_or_create(session, Phash, value=x)[0] for x in tqdm.tqdm(set(hash_list))} - else: - hash_list = [get_or_create(session, Phash, value=x)[0] for x in tqdm.tqdm(set(hash_list))] - m.phashes = hash_list - if save_set: - if bulk_save: - session.bulk_save_objects([ - Phash(value=item) for item in tqdm.tqdm(set(hash_list)) - ]) - else: - hashes_m_dict = {x: get_or_create(session, Phash, value=x)[0] for x in tqdm.tqdm(set(hash_list))} - if bulk_save: - session.bulk_save_objects([ - TrianglePhash( - checksum_id=m.id, - checksum=m, - points=[keypoint_m_dict[po[0]][po[1]] for po in item[0]], - phashes=[hashes_m_dict[ph] for ph in item[1]] - ) for item in tqdm.tqdm(res) - ]) - else: - tp_ms = [ - TrianglePhash( - checksum_id=m.id, - checksum=m, - points=[keypoint_m_dict[po[0]][po[1]] for po in item[0]], - phashes=[hashes_m_dict[ph] for ph in item[1]] - ) for item in tqdm.tqdm(res) - ] - list(map(session.add, tp_ms)) + hash_list = set(hash_list) # deduplicate hash_list + hash_list_ms = session.query(Phash).filter(Phash.value.in_(hash_list)).all() + hash_list_ms_values = [x.value for x in hash_list_ms] + not_in_db_hash_list = [x for x in hash_list if x not in hash_list_ms_values] + if not_in_db_hash_list: + for hash_group in tqdm.tqdm(grouper(not_in_db_hash_list, 100)): + session.add_all( + [Phash(value=i) for i in hash_group if i]) + session.flush + session.commit() + hash_list_ms = session.query(Phash).filter(Phash.value.in_(hash_list)).all() + m.phashes = hash_list_ms session.add(m) session.commit() - return [] + if session.query(Checksum).count() > 1: + res = session.query(Checksum).filter(Checksum.phashes.any(Phash.value.in_(hash_list))) \ + .filter(Checksum.value != m.value).all() + return res From 9193f2f0aecbcf23636c578cf137b13cffd07756 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Thu, 7 Feb 2019 11:34:47 +0800 Subject: [PATCH 30/40] new: dev: get_duplicate from model instead from filename --- transformation_invariant_image_search/models.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/transformation_invariant_image_search/models.py b/transformation_invariant_image_search/models.py index a556ae1..368d26b 100644 --- a/transformation_invariant_image_search/models.py +++ b/transformation_invariant_image_search/models.py @@ -159,7 +159,7 @@ def grouper(iterable, n, fillvalue=None): def get_duplicate( - session, filename, img_dir=DEFAULT_IMAGE_DIR, + session, filename=None, csm_m=None, img_dir=DEFAULT_IMAGE_DIR, triangle_lower=TRIANGLE_LOWER, triangle_upper=TRIANGLE_UPPER): """Get duplicate data. >>> import tempfile @@ -171,20 +171,29 @@ def get_duplicate( >>> app.app_context().push() >>> DB.create_all() >>> triangle_lower = 100 + >>> # Get duplicate from image filename >>> get_duplicate(DB.session, filename1, triangle_lower=triangle_lower) [] + >>> # Get duplicate from checksum model >>> m = DB.session.query(Checksum).filter_by(id=1).first() - >>> len(m.phashes) + >>> get_duplicate(DB.session, csm_m=m, triangle_lower=triangle_lower) + [] + >>> len(m.phashes) # count phash 15211 >>> get_duplicate(DB.session, filename2, triangle_lower=triangle_lower) [] """ + if csm_m is not None and filename is not None: + raise ValueError('Only either checksum model or filename is required') + if csm_m: + m, created = csm_m, False + else: + m, created = get_or_create_checksum_model(session, filename, img_dir=img_dir) res = [] - m, created = get_or_create_checksum_model(session, filename, img_dir=img_dir) if created: session.add(m) session.commit() - if not m.triangle_phashes: + if not m.phashes: img = cv2.imread(filename) keypoints = compute_keypoints(img) triangles = triangles_from_keypoints(keypoints, lower=triangle_lower, upper=triangle_upper) From b08561e2678701a354d80c147cc900f9aae97c0c Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Thu, 7 Feb 2019 13:18:01 +0800 Subject: [PATCH 31/40] fix: dev: get duplicate with checksum model --- .../models.py | 27 ++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/transformation_invariant_image_search/models.py b/transformation_invariant_image_search/models.py index 368d26b..1b8b56e 100644 --- a/transformation_invariant_image_search/models.py +++ b/transformation_invariant_image_search/models.py @@ -171,17 +171,26 @@ def get_duplicate( >>> app.app_context().push() >>> DB.create_all() >>> triangle_lower = 100 + >>> triangle_upper = 300 >>> # Get duplicate from image filename - >>> get_duplicate(DB.session, filename1, triangle_lower=triangle_lower) + >>> get_duplicate( + ... DB.session, filename1, + ... triangle_lower=triangle_lower, triangle_upper=triangle_upper) [] >>> # Get duplicate from checksum model >>> m = DB.session.query(Checksum).filter_by(id=1).first() - >>> get_duplicate(DB.session, csm_m=m, triangle_lower=triangle_lower) + >>> get_duplicate( + ... DB.session, csm_m=m, + ... triangle_lower=triangle_lower, triangle_upper=triangle_upper) [] - >>> len(m.phashes) # count phash - 15211 - >>> get_duplicate(DB.session, filename2, triangle_lower=triangle_lower) + >>> len(m.phashes) > 0 + True + >>> get_duplicate( + ... DB.session, filename2, + ... triangle_lower=triangle_lower, triangle_upper=triangle_upper) [] + >>> get_duplicate(DB.session, csm_m=m, triangle_lower=triangle_lower) + [] """ if csm_m is not None and filename is not None: raise ValueError('Only either checksum model or filename is required') @@ -193,8 +202,12 @@ def get_duplicate( if created: session.add(m) session.commit() + hash_list = None if not m.phashes: - img = cv2.imread(filename) + if filename: + img = cv2.imread(filename) + else: + img = cv2.imread(get_image_path(m.value, m.ext, img_dir)) keypoints = compute_keypoints(img) triangles = triangles_from_keypoints(keypoints, lower=triangle_lower, upper=triangle_upper) hash_list = [] @@ -216,6 +229,8 @@ def get_duplicate( session.add(m) session.commit() if session.query(Checksum).count() > 1: + if hash_list is None: + hash_list = [x.value for x in m.phashes] res = session.query(Checksum).filter(Checksum.phashes.any(Phash.value.in_(hash_list))) \ .filter(Checksum.value != m.value).all() return res From 2f3ede2fc601441de84ce20c68358b7bcf111f79 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Thu, 7 Feb 2019 15:40:46 +0800 Subject: [PATCH 32/40] chg: dev: models - speed up duplicate search - fix typo - commit on each loop - increase batch size - test on unique image --- .../models.py | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/transformation_invariant_image_search/models.py b/transformation_invariant_image_search/models.py index 1b8b56e..74d77b8 100644 --- a/transformation_invariant_image_search/models.py +++ b/transformation_invariant_image_search/models.py @@ -69,7 +69,7 @@ class Phash(Base): value = DB.Column(DB.String(), unique=True, nullable=False) def __repr__(self): - templ = '' + templ = '' return templ.format(self) @@ -166,6 +166,7 @@ def get_duplicate( >>> from . import main >>> filename1 = 'fullEndToEndDemo/inputImages/cat_original.png' >>> filename2 = 'fullEndToEndDemo/inputImages/cat1.png' + >>> filename3 = 'fullEndToEndDemo/inputImages/mona.jpg' >>> image_fd = tempfile.mkdtemp() >>> app = main.create_app(db_uri='sqlite://') >>> app.app_context().push() @@ -191,6 +192,10 @@ def get_duplicate( [] >>> get_duplicate(DB.session, csm_m=m, triangle_lower=triangle_lower) [] + >>> get_duplicate( + ... DB.session, filename3, + ... triangle_lower=triangle_lower, triangle_upper=triangle_upper) + [] """ if csm_m is not None and filename is not None: raise ValueError('Only either checksum model or filename is required') @@ -219,18 +224,18 @@ def get_duplicate( hash_list_ms_values = [x.value for x in hash_list_ms] not_in_db_hash_list = [x for x in hash_list if x not in hash_list_ms_values] if not_in_db_hash_list: - for hash_group in tqdm.tqdm(grouper(not_in_db_hash_list, 100)): + for hash_group in tqdm.tqdm(list(grouper(not_in_db_hash_list, 1000))): session.add_all( [Phash(value=i) for i in hash_group if i]) - session.flush - session.commit() + session.flush + session.commit() hash_list_ms = session.query(Phash).filter(Phash.value.in_(hash_list)).all() m.phashes = hash_list_ms session.add(m) session.commit() if session.query(Checksum).count() > 1: - if hash_list is None: - hash_list = [x.value for x in m.phashes] - res = session.query(Checksum).filter(Checksum.phashes.any(Phash.value.in_(hash_list))) \ - .filter(Checksum.value != m.value).all() + res = session.query(Checksum).join(Phash.checksums) \ + .distinct(Checksum.id) \ + .filter(Phash.checksums.any(Checksum.value == m.value)) \ + .filter(Checksum.id != m.id).all() return res From f3ac86ccc9441afd7f594cc79403711d82861341 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Thu, 7 Feb 2019 16:53:20 +0800 Subject: [PATCH 33/40] new: dev: interface - fix favicon - js for getting duplicate --- transformation_invariant_image_search/main.py | 15 ++++-- .../templates/tiis/index.html | 46 +++++++++++++++++-- 2 files changed, 53 insertions(+), 8 deletions(-) diff --git a/transformation_invariant_image_search/main.py b/transformation_invariant_image_search/main.py index 6532e34..682807a 100644 --- a/transformation_invariant_image_search/main.py +++ b/transformation_invariant_image_search/main.py @@ -157,10 +157,17 @@ def image_url(filename): def checksum_duplicate(cid): m = DB.session.query(Checksum).filter_by(id=cid).first_or_404() - if not m.triangle_phashes: - # TODO - return jsonify([]) - return jsonify([]) + res = models.get_duplicate( + DB.session, csm_m=m, triangle_lower=100, triangle_upper=300 + ) + dict_list = [x.to_dict() for x in res] + list(map( + lambda x: x.update({'url': url_for( + '.image_url', _external=True, + filename='{}.{}'.format(x['value'], x['ext']))}), + dict_list + )) + return jsonify(dict_list) def checksum_list(): diff --git a/transformation_invariant_image_search/templates/tiis/index.html b/transformation_invariant_image_search/templates/tiis/index.html index 9d383fc..e2eac28 100644 --- a/transformation_invariant_image_search/templates/tiis/index.html +++ b/transformation_invariant_image_search/templates/tiis/index.html @@ -1,22 +1,37 @@ {% extends 'admin/master.html' %} +{% block head %} +{{ super() }} + +{% endblock %} + {% block body %}
-
+
- +
+ +
+ +
-
+
+
+ +
{% endblock %} From 5415b02805a7d257c2f608ae2d891d0dd20b4ed7 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Thu, 7 Feb 2019 17:00:53 +0800 Subject: [PATCH 34/40] chg: dev: move get_duplicate func to main module --- transformation_invariant_image_search/main.py | 99 ++++++++++++++++++- .../models.py | 83 ---------------- 2 files changed, 97 insertions(+), 85 deletions(-) diff --git a/transformation_invariant_image_search/main.py b/transformation_invariant_image_search/main.py index 682807a..0351a45 100644 --- a/transformation_invariant_image_search/main.py +++ b/transformation_invariant_image_search/main.py @@ -24,6 +24,7 @@ import flask import numpy as np import redis +import tqdm from flask import ( current_app, Flask, @@ -33,15 +34,20 @@ url_for, ) +from . import models from .keypoints import compute_keypoints -from .phash import triangles_from_keypoints, hash_triangles from .models import ( DB, Checksum, DATA_DIR, DEFAULT_IMAGE_DIR ) -from . import models +from .phash import ( + triangles_from_keypoints, + hash_triangles, + TRIANGLE_LOWER, + TRIANGLE_UPPER, +) __version__ = '0.0.1' @@ -77,6 +83,95 @@ def phash_triangles(img, triangles, batch_size=None): return results +def get_duplicate( + session, filename=None, csm_m=None, img_dir=DEFAULT_IMAGE_DIR, + triangle_lower=TRIANGLE_LOWER, triangle_upper=TRIANGLE_UPPER): + """Get duplicate data. + >>> import tempfile + >>> from . import main + >>> filename1 = 'fullEndToEndDemo/inputImages/cat_original.png' + >>> filename2 = 'fullEndToEndDemo/inputImages/cat1.png' + >>> filename3 = 'fullEndToEndDemo/inputImages/mona.jpg' + >>> image_fd = tempfile.mkdtemp() + >>> app = main.create_app(db_uri='sqlite://') + >>> app.app_context().push() + >>> DB.create_all() + >>> triangle_lower = 100 + >>> triangle_upper = 300 + >>> # Get duplicate from image filename + >>> get_duplicate( + ... DB.session, filename1, + ... triangle_lower=triangle_lower, triangle_upper=triangle_upper) + [] + >>> # Get duplicate from checksum model + >>> m = DB.session.query(Checksum).filter_by(id=1).first() + >>> get_duplicate( + ... DB.session, csm_m=m, + ... triangle_lower=triangle_lower, triangle_upper=triangle_upper) + [] + >>> len(m.phashes) > 0 + True + >>> get_duplicate( + ... DB.session, filename2, + ... triangle_lower=triangle_lower, triangle_upper=triangle_upper) + [] + >>> get_duplicate(DB.session, csm_m=m, triangle_lower=triangle_lower) + [] + >>> get_duplicate( + ... DB.session, filename3, + ... triangle_lower=triangle_lower, triangle_upper=triangle_upper) + [] + """ + if csm_m is not None and filename is not None: + raise ValueError('Only either checksum model or filename is required') + if csm_m: + m, created = csm_m, False + else: + m, created = models.get_or_create_checksum_model( + session, filename, img_dir=img_dir) + res = [] + if created: + session.add(m) + session.commit() + hash_list = None + if not m.phashes: + if filename: + img = cv2.imread(filename) + else: + img = cv2.imread(get_image_path(m.value, m.ext, img_dir)) + keypoints = compute_keypoints(img) + triangles = triangles_from_keypoints( + keypoints, lower=triangle_lower, upper=triangle_upper) + hash_list = [] + for triangle in tqdm.tqdm(triangles): + hashes = hash_triangles(img, [triangle]) + hash_list.extend(hashes) + hash_list = set(hash_list) # deduplicate hash_list + hash_list_ms = session.query(models.Phash) \ + .filter(models.Phash.value.in_(hash_list)).all() + hash_list_ms_values = [x.value for x in hash_list_ms] + not_in_db_hash_list = \ + [x for x in hash_list if x not in hash_list_ms_values] + if not_in_db_hash_list: + for hash_group in tqdm.tqdm( + list(models.grouper(not_in_db_hash_list, 1000))): + session.add_all( + [models.Phash(value=i) for i in hash_group if i]) + session.flush + session.commit() + hash_list_ms = session.query(models.Phash) \ + .filter(models.Phash.value.in_(hash_list)).all() + m.phashes = hash_list_ms + session.add(m) + session.commit() + if session.query(Checksum).count() > 1: + res = session.query(Checksum).join(models.Phash.checksums) \ + .distinct(Checksum.id) \ + .filter(models.Phash.checksums.any(Checksum.value == m.value)) \ + .filter(Checksum.id != m.id).all() + return res + + def pipeline(r, data, chunk_size): npartitions = len(data) // chunk_size pipe = r.pipeline() diff --git a/transformation_invariant_image_search/models.py b/transformation_invariant_image_search/models.py index 74d77b8..ac3ec68 100644 --- a/transformation_invariant_image_search/models.py +++ b/transformation_invariant_image_search/models.py @@ -156,86 +156,3 @@ def grouper(iterable, n, fillvalue=None): """ args = [iter(iterable)] * n return zip_longest(*args, fillvalue=fillvalue) - - -def get_duplicate( - session, filename=None, csm_m=None, img_dir=DEFAULT_IMAGE_DIR, - triangle_lower=TRIANGLE_LOWER, triangle_upper=TRIANGLE_UPPER): - """Get duplicate data. - >>> import tempfile - >>> from . import main - >>> filename1 = 'fullEndToEndDemo/inputImages/cat_original.png' - >>> filename2 = 'fullEndToEndDemo/inputImages/cat1.png' - >>> filename3 = 'fullEndToEndDemo/inputImages/mona.jpg' - >>> image_fd = tempfile.mkdtemp() - >>> app = main.create_app(db_uri='sqlite://') - >>> app.app_context().push() - >>> DB.create_all() - >>> triangle_lower = 100 - >>> triangle_upper = 300 - >>> # Get duplicate from image filename - >>> get_duplicate( - ... DB.session, filename1, - ... triangle_lower=triangle_lower, triangle_upper=triangle_upper) - [] - >>> # Get duplicate from checksum model - >>> m = DB.session.query(Checksum).filter_by(id=1).first() - >>> get_duplicate( - ... DB.session, csm_m=m, - ... triangle_lower=triangle_lower, triangle_upper=triangle_upper) - [] - >>> len(m.phashes) > 0 - True - >>> get_duplicate( - ... DB.session, filename2, - ... triangle_lower=triangle_lower, triangle_upper=triangle_upper) - [] - >>> get_duplicate(DB.session, csm_m=m, triangle_lower=triangle_lower) - [] - >>> get_duplicate( - ... DB.session, filename3, - ... triangle_lower=triangle_lower, triangle_upper=triangle_upper) - [] - """ - if csm_m is not None and filename is not None: - raise ValueError('Only either checksum model or filename is required') - if csm_m: - m, created = csm_m, False - else: - m, created = get_or_create_checksum_model(session, filename, img_dir=img_dir) - res = [] - if created: - session.add(m) - session.commit() - hash_list = None - if not m.phashes: - if filename: - img = cv2.imread(filename) - else: - img = cv2.imread(get_image_path(m.value, m.ext, img_dir)) - keypoints = compute_keypoints(img) - triangles = triangles_from_keypoints(keypoints, lower=triangle_lower, upper=triangle_upper) - hash_list = [] - for triangle in tqdm.tqdm(triangles): - hashes = hash_triangles(img, [triangle]) - hash_list.extend(hashes) - hash_list = set(hash_list) # deduplicate hash_list - hash_list_ms = session.query(Phash).filter(Phash.value.in_(hash_list)).all() - hash_list_ms_values = [x.value for x in hash_list_ms] - not_in_db_hash_list = [x for x in hash_list if x not in hash_list_ms_values] - if not_in_db_hash_list: - for hash_group in tqdm.tqdm(list(grouper(not_in_db_hash_list, 1000))): - session.add_all( - [Phash(value=i) for i in hash_group if i]) - session.flush - session.commit() - hash_list_ms = session.query(Phash).filter(Phash.value.in_(hash_list)).all() - m.phashes = hash_list_ms - session.add(m) - session.commit() - if session.query(Checksum).count() > 1: - res = session.query(Checksum).join(Phash.checksums) \ - .distinct(Checksum.id) \ - .filter(Phash.checksums.any(Checksum.value == m.value)) \ - .filter(Checksum.id != m.id).all() - return res From 60bae3d8a21f57e1b67cf9ca4e4773a636f8a7cc Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Thu, 7 Feb 2019 17:04:28 +0800 Subject: [PATCH 35/40] chg: dev: use phash_triangles (faster) --- transformation_invariant_image_search/main.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/transformation_invariant_image_search/main.py b/transformation_invariant_image_search/main.py index 0351a45..2f67b5a 100644 --- a/transformation_invariant_image_search/main.py +++ b/transformation_invariant_image_search/main.py @@ -142,11 +142,7 @@ def get_duplicate( keypoints = compute_keypoints(img) triangles = triangles_from_keypoints( keypoints, lower=triangle_lower, upper=triangle_upper) - hash_list = [] - for triangle in tqdm.tqdm(triangles): - hashes = hash_triangles(img, [triangle]) - hash_list.extend(hashes) - hash_list = set(hash_list) # deduplicate hash_list + hash_list = set(phash_triangles(img, triangles)) hash_list_ms = session.query(models.Phash) \ .filter(models.Phash.value.in_(hash_list)).all() hash_list_ms_values = [x.value for x in hash_list_ms] From 1b4699718da95eb07484a2d9b39c609b9f3d1cd6 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Thu, 7 Feb 2019 17:06:31 +0800 Subject: [PATCH 36/40] fix: dev: json api for duplicate endpoint --- transformation_invariant_image_search/main.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/transformation_invariant_image_search/main.py b/transformation_invariant_image_search/main.py index 2f67b5a..71869f9 100644 --- a/transformation_invariant_image_search/main.py +++ b/transformation_invariant_image_search/main.py @@ -138,7 +138,8 @@ def get_duplicate( if filename: img = cv2.imread(filename) else: - img = cv2.imread(get_image_path(m.value, m.ext, img_dir)) + img = cv2.imread( + models.get_image_path(m.value, m.ext, img_dir)) keypoints = compute_keypoints(img) triangles = triangles_from_keypoints( keypoints, lower=triangle_lower, upper=triangle_upper) @@ -248,7 +249,7 @@ def image_url(filename): def checksum_duplicate(cid): m = DB.session.query(Checksum).filter_by(id=cid).first_or_404() - res = models.get_duplicate( + res = get_duplicate( DB.session, csm_m=m, triangle_lower=100, triangle_upper=300 ) dict_list = [x.to_dict() for x in res] From 7a3ac616ef817eca67fb8ab8d60fff2c48dbcb7f Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Thu, 7 Feb 2019 18:53:33 +0800 Subject: [PATCH 37/40] fix: dev: get_duplicate - split hash to batch - simple logging --- transformation_invariant_image_search/main.py | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/transformation_invariant_image_search/main.py b/transformation_invariant_image_search/main.py index 71869f9..d9d8c17 100644 --- a/transformation_invariant_image_search/main.py +++ b/transformation_invariant_image_search/main.py @@ -12,6 +12,7 @@ import sys import tempfile import pathlib +import logging from appdirs import user_data_dir from flask.cli import FlaskGroup @@ -144,21 +145,33 @@ def get_duplicate( triangles = triangles_from_keypoints( keypoints, lower=triangle_lower, upper=triangle_upper) hash_list = set(phash_triangles(img, triangles)) - hash_list_ms = session.query(models.Phash) \ - .filter(models.Phash.value.in_(hash_list)).all() + hash_list_ms = [] + logging.debug('getting existing phash on db') + for hash_group in tqdm.tqdm( + list(models.grouper(hash_list, 999))): + hash_list_ms.extend( + session.query(models.Phash) + .filter(models.Phash.value.in_(filter(lambda x: x, hash_group))) + .all()) hash_list_ms_values = [x.value for x in hash_list_ms] not_in_db_hash_list = \ [x for x in hash_list if x not in hash_list_ms_values] if not_in_db_hash_list: + logging.debug('insert phash') for hash_group in tqdm.tqdm( list(models.grouper(not_in_db_hash_list, 1000))): session.add_all( [models.Phash(value=i) for i in hash_group if i]) session.flush session.commit() - hash_list_ms = session.query(models.Phash) \ - .filter(models.Phash.value.in_(hash_list)).all() - m.phashes = hash_list_ms + logging.debug('getting rest of phash') + for hash_group in tqdm.tqdm( + list(models.grouper(not_in_db_hash_list, 999))): + hash_list_ms.extend( + session.query(models.Phash) \ + .filter(models.Phash.value.in_(filter(lambda x: x, hash_group))) \ + .all()) + m.phashes.extend(hash_list_ms) session.add(m) session.commit() if session.query(Checksum).count() > 1: @@ -250,8 +263,7 @@ def image_url(filename): def checksum_duplicate(cid): m = DB.session.query(Checksum).filter_by(id=cid).first_or_404() res = get_duplicate( - DB.session, csm_m=m, triangle_lower=100, triangle_upper=300 - ) + DB.session, csm_m=m, triangle_lower=100, triangle_upper=300) dict_list = [x.to_dict() for x in res] list(map( lambda x: x.update({'url': url_for( From ba069dc21b6eb67cf93df64d4e954bb770d701d1 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Thu, 7 Feb 2019 19:14:14 +0800 Subject: [PATCH 38/40] fix: test: upload api --- tests/test_main.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/test_main.py b/tests/test_main.py index d6f3641..b7980d3 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -72,15 +72,15 @@ def test_image_post(client): assert rv.get_json() == [exp_dict] -def test_checksum_duplicate(client): - filename1 = 'fullEndToEndDemo/inputImages/cat_original.png' - filename2 = 'fullEndToEndDemo/inputImages/cat1.png' +def test_upload_api(client): + filename = 'fullEndToEndDemo/inputImages/cat1.png' upload_url = '/api/image' - url = '/api/checksum/{}/duplicate' - client.post(upload_url, data={'file': open(filename1, 'rb')}) - client.post(upload_url, data={'file': open(filename2, 'rb')}) - rv = client.get(url.format(2)) - assert rv.get_json() == [] + rv = client.post(upload_url, data={'file': open(filename, 'rb')}) + assert rv.get_json() == { + 'ext': 'png', 'id': 1, 'trash': False, + 'url': ['http://localhost/i/4aba099f752d609aad2ed4c28f972ae96d02ad2579d0dd3f16b1ac29a88caf6d.png'], + 'value': '4aba099f752d609aad2ed4c28f972ae96d02ad2579d0dd3f16b1ac29a88caf6d' + } @pytest.mark.parametrize( From db47c7dfcb51eeeae58d9fc735b761a1b11e72b9 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Thu, 7 Feb 2019 19:19:16 +0800 Subject: [PATCH 39/40] chg: dev: remove unused table --- .../models.py | 34 ------------------- 1 file changed, 34 deletions(-) diff --git a/transformation_invariant_image_search/models.py b/transformation_invariant_image_search/models.py index ac3ec68..0fe58d9 100644 --- a/transformation_invariant_image_search/models.py +++ b/transformation_invariant_image_search/models.py @@ -20,14 +20,6 @@ pathlib.Path(DATA_DIR).mkdir(parents=True, exist_ok=True) DEFAULT_IMAGE_DIR = os.path.join(DATA_DIR, 'image') -triangle_points = DB.Table( - 'triangle_points', - DB.Column('triangle_phash_id', DB.Integer, DB.ForeignKey('triangle_phash.id'), primary_key=True), - DB.Column('point_id', DB.Integer, DB.ForeignKey('point.id'), primary_key=True)) -triangle_phashes = DB.Table( - 'triangle_phashes', - DB.Column('triangle_phash_id', DB.Integer, DB.ForeignKey('triangle_phash.id'), primary_key=True), - DB.Column('phash_id', DB.Integer, DB.ForeignKey('phash.id'), primary_key=True)) checksum_phashes = DB.Table( 'checksum_phashes', DB.Column('checksum_id', DB.Integer, DB.ForeignKey('checksum.id'), primary_key=True), @@ -56,15 +48,6 @@ def to_dict(self): return {k: getattr(self, k) for k in keys} -class Point(Base): - x = DB.Column(DB.Integer(), nullable=False) - y = DB.Column(DB.Integer(), nullable=False) - - def __repr__(self): - templ = '' - return templ.format(self) - - class Phash(Base): value = DB.Column(DB.String(), unique=True, nullable=False) @@ -73,23 +56,6 @@ def __repr__(self): return templ.format(self) -class TrianglePhash(Base): - checksum_id = DB.Column(DB.Integer, DB.ForeignKey('checksum.id'), nullable=False) - checksum = DB.relationship('Checksum', backref='triangle_phashes', lazy=True) - points = DB.relationship('Point', secondary=triangle_points, lazy='subquery', - backref=DB.backref('triangle_phashes', lazy=True)) - phashes = DB.relationship('Phash', secondary=triangle_phashes, lazy='subquery', - backref=DB.backref('triangle_phashes', lazy=True)) - - def __repr__(self): - templ = '' - return templ.format( - self, - ','.join(['({0.x, 0.y})'.format(x) for x in Point]), - ','.join(['{0.value}'.format(x) for x in Point]), - ) - - def get_or_create(session, model, **kwargs): """Creates an object or returns the object if exists.""" instance = session.query(model).filter_by(**kwargs).first() From 78998eee955f9691e72179013eaac34605c5f0e6 Mon Sep 17 00:00:00 2001 From: rachmadaniHaryono Date: Sun, 10 Feb 2019 18:43:55 +0800 Subject: [PATCH 40/40] fix: dev: similar func --- transformation_invariant_image_search/main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/transformation_invariant_image_search/main.py b/transformation_invariant_image_search/main.py index d9d8c17..e5b8b00 100644 --- a/transformation_invariant_image_search/main.py +++ b/transformation_invariant_image_search/main.py @@ -190,7 +190,7 @@ def pipeline(r, data, chunk_size): yield pipe, chunk -def insert(chunks, filename): +def insert_(chunks, filename): n = 0 for pipe, keys in chunks: @@ -202,7 +202,7 @@ def insert(chunks, filename): print(f'added {n} fragments for {filename}') -def lookup(chunks, filename): +def lookup_(chunks, filename): count = Counter() for pipe, keys in chunks: @@ -389,7 +389,7 @@ def lookup(image): def main(command, filenames): - command = insert if command == 'insert' else lookup + command = insert_ if command == 'insert' else lookup_ r = redis.StrictRedis(host='localhost', port=6379, db=0) try: