Skip to content
This repository was archived by the owner on Sep 12, 2018. It is now read-only.

Commit b600a18

Browse files
committed
Merge pull request #247 from wking/search-index
Flesh out repository deletion and add basic search indexing
2 parents 6732a5f + a8bd4a4 commit b600a18

File tree

10 files changed

+292
-8
lines changed

10 files changed

+292
-8
lines changed

README.md

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ environment: `export SETTINGS_FLAVOR=dev`
3636
The default environment is `dev`.
3737

3838
NOTE: it's possible to load environment variables from the config file
39-
with a simple syntax: _env:VARIABLENAME. Check this syntax in action
40-
in the example below...
39+
with a simple syntax: `_env:VARIABLENAME[:DEFAULT]`. Check this syntax
40+
in action in the example below...
4141

4242

4343
#### Example config
@@ -46,6 +46,9 @@ in the example below...
4646

4747
common:
4848
loglevel: info
49+
search_backend: "_env:SEARCH_BACKEND:"
50+
sqlalchemy_index_database:
51+
"_env:SQLALCHEMY_INDEX_DATABASE:sqlite:////tmp/docker-registry.db"
4952

5053
prod:
5154
loglevel: warn
@@ -208,6 +211,36 @@ dev:
208211
gs_secure: false
209212
```
210213

214+
### Search-engine options
215+
216+
The Docker Registry can optionally index repository information in a
217+
database for the `GET /v1/search` [endpoint][search-endpoint]. You
218+
can configure the backend with a configuration like:
219+
220+
```yaml
221+
search_backend: "_env:SEARCH_BACKEND:"
222+
```
223+
224+
The `search_backend` setting selects the search backend to use. If
225+
`search_backend` is empty, no index is built, and the search endpoint
226+
always returns empty results. Currently supported backends and their
227+
backend-specific configuration options are:
228+
229+
* `sqlalchemy': Use [SQLAlchemy][].
230+
* The backing database is selected with
231+
`sqlalchemy_index_database`, which is passed through to
232+
[create_engine][].
233+
234+
If `search_backend` is neither empty nor one of the above backends, it
235+
should point to a module:
236+
237+
```yaml
238+
search_backend: foo.registry.index.xapian
239+
```
240+
241+
In this case, the module is imported, and an instance of it's `Index`
242+
class is used as the search backend.
243+
211244
### Email options
212245

213246
Settings these options makes the Registry send an email on each code Exception:
@@ -408,3 +441,9 @@ pip install tox
408441
cd docker-registry/
409442
tox
410443
```
444+
445+
[search-endpoint]:
446+
http://docs.docker.io/en/latest/reference/api/index_api/#get--v1-search
447+
[SQLAlchemy]: http://docs.sqlalchemy.org/
448+
[create_engine]:
449+
http://docs.sqlalchemy.org/en/latest/core/engines.html#sqlalchemy.create_engine

config/config_sample.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ common:
3131
# Let gunicorn set this environment variable or set a random string here
3232
secret_key: _env:SECRET_KEY
3333

34+
search_backend: "_env:SEARCH_BACKEND:"
35+
sqlalchemy_index_database:
36+
"_env:SQLALCHEMY_INDEX_DATABASE:sqlite:////tmp/docker-registry.db"
37+
3438

3539
# This is the default configuration when no flavor is specified
3640
dev:

config/config_test.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,6 @@ test:
1515
gs_access_key: _env:GS_ACCESS_KEY
1616
gs_secret_key: _env:GS_SECRET_KEY
1717
gs_secure: false
18+
19+
search_backend: sqlalchemy
20+
sqlalchemy_index_database: sqlite:////tmp/docker-registry.db

lib/index/__init__.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
"""Index backends for the search endpoint
2+
"""
3+
4+
import importlib
5+
import signals
6+
7+
import config
8+
9+
10+
__all__ = ['load']
11+
12+
13+
class Index (object):
14+
"""A backend for the search endpoint
15+
16+
The backend can use .walk_storage to generate an initial index,
17+
._handle_repository_* to stay up to date with registry changes,
18+
and .results to respond to queries.
19+
"""
20+
def __init__(self):
21+
signals.repository_created.connect(self._handle_repository_created)
22+
signals.repository_updated.connect(self._handle_repository_updated)
23+
signals.repository_deleted.connect(self._handle_repository_deleted)
24+
25+
def _walk_storage(self, store):
26+
"""Iterate through repositories in storage
27+
28+
This helper is useful for building an initial database for
29+
your search index. Yields dictionaries:
30+
31+
{'name': name, 'description': description}
32+
"""
33+
try:
34+
namespace_paths = list(
35+
store.list_directory(path=store.repositories))
36+
except OSError:
37+
namespace_paths = []
38+
for namespace_path in namespace_paths:
39+
namespace = namespace_path.rsplit('/', 1)[-1]
40+
try:
41+
repository_paths = list(
42+
store.list_directory(path=namespace_path))
43+
except OSError:
44+
repository_paths = []
45+
for path in repository_paths:
46+
repository = path.rsplit('/', 1)[-1]
47+
name = '{0}/{1}'.format(namespace, repository)
48+
description = None # TODO(wking): store descriptions
49+
yield({'name': name, 'description': description})
50+
51+
def _handle_repository_created(
52+
self, sender, namespace, repository, value):
53+
pass
54+
55+
def _handle_repository_updated(
56+
self, sender, namespace, repository, value):
57+
pass
58+
59+
def _handle_repository_deleted(self, sender, namespace, repository):
60+
pass
61+
62+
def results(self, search_term):
63+
"""Return a list of results matching search_term
64+
65+
The list elements should be dictionaries:
66+
67+
{'name': name, 'description': description}
68+
"""
69+
raise NotImplementedError('results method for {!r}'.format(self))
70+
71+
72+
def load(kind=None):
73+
"""Returns an Index instance according to the configuration."""
74+
cfg = config.load()
75+
if not kind:
76+
kind = cfg.search_backend.lower()
77+
if kind == 'sqlalchemy':
78+
from . import db
79+
return db.SQLAlchemyIndex()
80+
try:
81+
module = importlib.import_module(kind)
82+
except ImportError:
83+
pass
84+
else:
85+
return module.Index()
86+
raise NotImplementedError('Unknown index type {0!r}'.format(kind))

lib/index/db.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
"""An SQLAlchemy backend for the search endpoint
2+
"""
3+
4+
import sqlalchemy
5+
import sqlalchemy.exc
6+
import sqlalchemy.ext.declarative
7+
import sqlalchemy.orm
8+
import sqlalchemy.sql.functions
9+
10+
import config
11+
import storage
12+
13+
from . import Index
14+
15+
16+
Base = sqlalchemy.ext.declarative.declarative_base()
17+
18+
19+
class Version (Base):
20+
"Schema version for the search-index database"
21+
__tablename__ = 'version'
22+
23+
id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
24+
25+
def __repr__(self):
26+
return '<{0}(id={1})>'.format(type(self).__name__, self.id)
27+
28+
29+
class Repository (Base):
30+
"Repository description"
31+
__tablename__ = 'repository'
32+
33+
id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
34+
name = sqlalchemy.Column(
35+
sqlalchemy.String, nullable=False, unique=True)
36+
description = sqlalchemy.Column(sqlalchemy.String)
37+
38+
def __repr__(self):
39+
return "<{0}(name='{1}', description='{2}')>".format(
40+
type(self).__name__, self.name, self.description)
41+
42+
43+
class SQLAlchemyIndex (Index):
44+
"""Maintain an index of repository data
45+
46+
The index is a dictionary. The keys are
47+
'{namespace}/{repository}' strings, and the values are description
48+
strings. For example:
49+
50+
index['library/ubuntu'] = 'An ubuntu image...'
51+
"""
52+
def __init__(self, database=None):
53+
if database is None:
54+
cfg = config.load()
55+
database = cfg.sqlalchemy_index_database
56+
self._engine = sqlalchemy.create_engine(database)
57+
self._session = sqlalchemy.orm.sessionmaker(bind=self._engine)
58+
self.version = 1
59+
self._setup_database()
60+
super(SQLAlchemyIndex, self).__init__()
61+
62+
def _setup_database(self):
63+
session = self._session()
64+
try:
65+
version = session.query(
66+
sqlalchemy.sql.functions.max(Version.id)).first()[0]
67+
except sqlalchemy.exc.OperationalError:
68+
version = None
69+
if version:
70+
if version != self.version:
71+
raise NotImplementedError(
72+
'unrecognized search index version {0}'.format(version))
73+
else:
74+
self._generate_index(session=session)
75+
session.close()
76+
77+
def _generate_index(self, session):
78+
store = storage.load()
79+
Base.metadata.create_all(self._engine)
80+
session.add(Version(id=self.version))
81+
for repository in self._walk_storage(store=store):
82+
session.add(Repository(**repository))
83+
session.commit()
84+
85+
def _handle_repository_created(
86+
self, sender, namespace, repository, value):
87+
name = '{0}/{1}'.format(namespace, repository)
88+
description = '' # TODO(wking): store descriptions
89+
session = self._session()
90+
session.add(Repository(name=name, description=description))
91+
session.commit()
92+
session.close()
93+
94+
def _handle_repository_updated(
95+
self, sender, namespace, repository, value):
96+
name = '{0}/{1}'.format(namespace, repository)
97+
description = '' # TODO(wking): store descriptions
98+
session = self._session()
99+
session.query(Repository).filter(
100+
Repository.name == name).update(
101+
values={'description': description},
102+
synchronize_session=False)
103+
session.commit()
104+
session.close()
105+
106+
def _handle_repository_deleted(self, sender, namespace, repository):
107+
name = '{0}/{1}'.format(namespace, repository)
108+
session = self._session()
109+
session.query(Repository).filter(Repository.name == name).delete()
110+
session.commit()
111+
session.close()
112+
113+
def results(self, search_term):
114+
session = self._session()
115+
like_term = '%{}%'.format(search_term)
116+
repositories = session.query(Repository).filter(
117+
sqlalchemy.sql.or_(
118+
Repository.name.like(like_term),
119+
Repository.description.like(like_term)))
120+
return [
121+
{
122+
'name': repo.name,
123+
'description': repo.description,
124+
}
125+
for repo in repositories]

registry/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from .app import app
55
from .tags import *
66
from .images import *
7+
from . import search as search
78
from .status import *
89

910
import config

registry/index.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,3 @@ def delete_repository_images(namespace, repository):
133133
@toolkit.parse_repository_name
134134
def put_repository_auth(namespace, repository):
135135
return toolkit.response('OK')
136-
137-
138-
@app.route('/v1/search', methods=['GET'])
139-
def get_search():
140-
return toolkit.response({})

registry/search.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import flask
2+
3+
import config
4+
import lib.index as index
5+
6+
from . import toolkit
7+
from .app import app
8+
9+
10+
cfg = config.load()
11+
12+
# Enable the search index
13+
if cfg.search_backend:
14+
INDEX = index.load(cfg.search_backend.lower())
15+
else:
16+
INDEX = None
17+
18+
19+
@app.route('/v1/search', methods=['GET'])
20+
def get_search():
21+
search_term = flask.request.args.get('q', '')
22+
if INDEX is None:
23+
results = []
24+
else:
25+
results = INDEX.results(search_term=search_term)
26+
return toolkit.response({
27+
'query': search_term,
28+
'num_results': len(results),
29+
'results': results,
30+
})

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,6 @@ python-keystoneclient==0.3.1
1212
python-swiftclient==1.8.0
1313
redis==2.8.0
1414
rsa==3.1.2
15+
sqlalchemy==0.9.2
1516
bugsnag==1.3.1
1617
gcs-oauth2-boto-plugin>=1.3

run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ GUNICORN_GRACEFUL_TIMEOUT=${GUNICORN_GRACEFUL_TIMEOUT:-3600}
66
GUNICORN_SILENT_TIMEOUT=${GUNICORN_SILENT_TIMEOUT:-3600}
77

88
cd "$(dirname $0)"
9-
exec gunicorn --access-logfile - --debug --max-requests 100 --graceful-timeout $GUNICORN_GRACEFUL_TIMEOUT -t $GUNICORN_SILENT_TIMEOUT -k gevent -b 0.0.0.0:$REGISTRY_PORT -w $GUNICORN_WORKERS wsgi:application
9+
exec gunicorn --access-logfile - --max-requests 100 --graceful-timeout $GUNICORN_GRACEFUL_TIMEOUT -t $GUNICORN_SILENT_TIMEOUT -k gevent -b 0.0.0.0:$REGISTRY_PORT -w $GUNICORN_WORKERS $GUNICORN_OPTS "$@" wsgi:application

0 commit comments

Comments
 (0)