Skip to content
This repository was archived by the owner on Sep 12, 2018. It is now read-only.

Commit fab832b

Browse files
committed
search: Use SQLAlchemy as a search-index backend
This avoids storing search-index state in the local process (as the previous Python-dict-based implementation did). Multiple registry processes (e.g. gunicorn workers) can now use the same search-index database and stay in sync with each other.
1 parent 534c6da commit fab832b

File tree

6 files changed

+140
-64
lines changed

6 files changed

+140
-64
lines changed

README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ in action in the example below...
4646

4747
common:
4848
loglevel: info
49+
search_index: "_env:SEARCH_INDEX:"
4950

5051
prod:
5152
loglevel: warn
@@ -204,6 +205,22 @@ dev:
204205
gs_secure: false
205206
```
206207

208+
### Search-engine options
209+
210+
The Docker Registry can optionally use [SQLAlchemy][] to index
211+
repository information in a database, for the `GET /v1/search`
212+
[endpoint][search-endpoint]. You can configure the backend with a
213+
configuration like:
214+
215+
```yaml
216+
search_index: "_env:SEARCH_INDEX:"
217+
```
218+
219+
you can use the `SEARCH_INDEX` environment variable to configure the
220+
string passed to [create_engine][]. If the configured `search_index`
221+
is empty, no index is built, and the search endpoint always returns
222+
empty results.
223+
207224
### Email options
208225

209226
Settings these options makes the Registry send an email on each code Exception:
@@ -404,3 +421,9 @@ pip install tox
404421
cd docker-registry/
405422
tox
406423
```
424+
425+
[SQLAlchemy]: http://docs.sqlalchemy.org/
426+
[search-endpoint]:
427+
http://docs.docker.io/en/latest/reference/api/index_api/#get--v1-search
428+
[create_engine]:
429+
http://docs.sqlalchemy.org/en/latest/core/engines.html#sqlalchemy.create_engine

config/config_sample.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ common:
3131
# Let gunicorn set this environment variable or set a random string here
3232
secret_key: _env:SECRET_KEY
3333

34+
search_index: "_env:SEARCH_INDEX:"
35+
3436

3537
# This is the default configuration when no flavor is specified
3638
dev:

config/config_test.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,5 @@ test:
1414
gs_access_key: _env:GS_ACCESS_KEY
1515
gs_secret_key: _env:GS_SECRET_KEY
1616
gs_secure: false
17+
18+
search_index: sqlite:////tmp/docker-registry.db

lib/storage/__init__.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,13 @@ class Storage(object):
1212
"""Storage is organized as follow:
1313
$ROOT/images/<image_id>/json
1414
$ROOT/images/<image_id>/layer
15-
$ROOT/index/json
1615
$ROOT/repositories/<namespace>/<repository_name>/<tag_name>
1716
"""
1817

1918
# Useful if we want to change those locations later without rewriting
2019
# the code which uses Storage
2120
repositories = 'repositories'
2221
images = 'images'
23-
index = 'index'
2422
# Set the IO buffer to 128kB
2523
buffer_size = 128 * 1024
2624
# By default no storage plugin supports it
@@ -87,9 +85,6 @@ def private_flag_path(self, namespace, repository):
8785
def is_private(self, namespace, repository):
8886
return self.exists(self.private_flag_path(namespace, repository))
8987

90-
def index_path(self):
91-
return '{0}/json'.format(self.index)
92-
9388
def get_content(self, path):
9489
raise NotImplementedError
9590

registry/search.py

Lines changed: 112 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,56 @@
11
__all__ = ['get_search']
22

33
import flask
4-
import simplejson as json
5-
4+
try:
5+
import sqlalchemy
6+
import sqlalchemy.exc
7+
import sqlalchemy.ext.declarative
8+
import sqlalchemy.orm
9+
import sqlalchemy.sql.functions
10+
except ImportError as e:
11+
_sqlalchemy_import_error = e
12+
sqlalchemy = None
13+
14+
import config
615
import signals
716
import storage
817
import toolkit
918

1019
from .app import app
1120

1221

22+
cfg = config.load()
23+
index = None
1324
store = storage.load()
14-
#index = Index()
1525

1626

17-
class Index (dict):
27+
if sqlalchemy:
28+
Base = sqlalchemy.ext.declarative.declarative_base()
29+
30+
class Version (Base):
31+
"Schema version for the search-index database"
32+
__tablename__ = 'version'
33+
34+
id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
35+
36+
def __repr__(self):
37+
return '<{0}(id={1})>'.format(type(self).__name__, self.id)
38+
39+
class Repository (Base):
40+
"Repository description"
41+
__tablename__ = 'repository'
42+
43+
id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
44+
name = sqlalchemy.Column(
45+
sqlalchemy.String, nullable=False, unique=True)
46+
description = sqlalchemy.Column(sqlalchemy.String)
47+
48+
def __repr__(self):
49+
return "<{0}(name='{1}', description='{2}')>".format(
50+
type(self).__name__, self.name, self.description)
51+
52+
53+
class Index (object):
1854
"""Maintain an index of repository data
1955
2056
The index is a dictionary. The keys are
@@ -23,43 +59,33 @@ class Index (dict):
2359
2460
index['library/ubuntu'] = 'An ubuntu image...'
2561
"""
26-
def __init__(self):
27-
super(Index, self).__init__()
62+
def __init__(self, database):
63+
self._engine = sqlalchemy.create_engine(database)
64+
self._session = sqlalchemy.orm.sessionmaker(bind=self._engine)
2865
self.version = 1
29-
self.load()
66+
self._setup_database()
3067
signals.repository_created.connect(self._handler_repository_created)
31-
signals.repository_updated.connect(self._handler_repository_created)
68+
signals.repository_updated.connect(self._handler_repository_updated)
3269
signals.repository_deleted.connect(self._handler_repository_deleted)
3370

34-
def load(self):
35-
regenerated = False
71+
def _setup_database(self):
72+
session = self._session()
3673
try:
37-
index_content = store.get_content(store.index_path())
38-
except (OSError, IOError):
39-
index_data = self._regenerate_index()
40-
regenerated = True
41-
else:
42-
data = json.loads(index_content)
43-
if data['version'] != self.version:
74+
version = session.query(
75+
sqlalchemy.sql.functions.max(Version.id)).first()[0]
76+
except sqlalchemy.exc.OperationalError:
77+
version = None
78+
if version:
79+
if version != self.version:
4480
raise NotImplementedError(
45-
'unrecognized search index version {0}'.format(
46-
data['version']))
47-
index_data = data['index']
48-
self.clear()
49-
self.update(index_data)
50-
if regenerated:
51-
self.save()
52-
53-
def save(self):
54-
index_data = {
55-
'version': self.version,
56-
'index': dict(self),
57-
}
58-
store.put_content(store.index_path(), json.dumps(index_data))
59-
60-
def _regenerate_index(self):
61-
index_data = {}
62-
description = '' # TODO(wking): store descriptions
81+
'unrecognized search index version {0}'.format(version))
82+
else:
83+
self._generate_index(session=session)
84+
session.close()
85+
86+
def _generate_index(self, session):
87+
Base.metadata.create_all(self._engine)
88+
session.add(Version(id=self.version))
6389
try:
6490
namespace_paths = list(
6591
store.list_directory(path=store.repositories))
@@ -74,41 +100,68 @@ def _regenerate_index(self):
74100
repository_paths = []
75101
for path in repository_paths:
76102
repository = path.rsplit('/', 1)[-1]
77-
key = '{0}/{1}'.format(namespace, repository)
78-
index_data[key] = description
79-
return index_data
103+
name = '{0}/{1}'.format(namespace, repository)
104+
description = None # TODO(wking): store descriptions
105+
session.add(Repository(name=name, description=description))
106+
session.commit()
80107

81108
def _handler_repository_created(
82109
self, sender, namespace, repository, value):
83-
key = '{0}/{1}'.format(namespace, repository)
110+
name = '{0}/{1}'.format(namespace, repository)
84111
description = '' # TODO(wking): store descriptions
85-
self[key] = description
86-
self.save()
87-
88-
def _handler_repository_deleted(self, sender, namespace, repository):
89-
key = '{0}/{1}'.format(namespace, repository)
90-
try:
91-
self.pop(key)
92-
except KeyError:
93-
pass
94-
else:
95-
self.save()
112+
session = self._session()
113+
session.add(Repository(name=name, description=description))
114+
session.commit()
115+
session.close()
96116

117+
def _handler_repository_updated(
118+
self, sender, namespace, repository, value):
119+
name = '{0}/{1}'.format(namespace, repository)
120+
description = '' # TODO(wking): store descriptions
121+
session = self._session()
122+
session.query(Repository).filter(
123+
Repository.name == name).update(
124+
values={'description': description},
125+
synchronize_session=False)
126+
session.commit()
127+
session.close()
97128

98-
index = Index()
129+
def _handler_repository_deleted(self, sender, namespace, repository):
130+
name = '{0}/{1}'.format(namespace, repository)
131+
session = self._session()
132+
session.query(Repository).filter(Repository.name == name).delete()
133+
session.commit()
134+
session.close()
135+
136+
def results(self, search_term):
137+
session = self._session()
138+
like_term = '%{}%'.format(search_term)
139+
repositories = session.query(Repository).filter(
140+
sqlalchemy.sql.or_(
141+
Repository.name.like(like_term),
142+
Repository.description.like(like_term)))
143+
return [
144+
{
145+
'name': repo.name,
146+
'description': repo.description,
147+
}
148+
for repo in repositories]
149+
150+
151+
# Enable the search index
152+
if cfg.search_index:
153+
if not sqlalchemy:
154+
raise _sqlalchemy_import_error
155+
index = Index(database=cfg.search_index)
99156

100157

101158
@app.route('/v1/search', methods=['GET'])
102159
def get_search():
103160
search_term = flask.request.args.get('q', '')
104-
results = [
105-
{
106-
'name': name,
107-
'description': description,
108-
}
109-
for name, description in index.items()
110-
if search_term in name
111-
or search_term in description]
161+
if index is None:
162+
results = []
163+
else:
164+
results = index.results(search_term=search_term)
112165
return toolkit.response({
113166
'query': search_term,
114167
'num_results': len(results),

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,6 @@ python-keystoneclient==0.3.1
1212
python-swiftclient==1.8.0
1313
redis==2.8.0
1414
rsa==3.1.2
15+
sqlalchemy==0.9.2
1516
bugsnag==1.3.1
1617
gcs-oauth2-boto-plugin>=1.3

0 commit comments

Comments
 (0)